summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/intel_reset.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_reset.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c142
1 files changed, 90 insertions, 52 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index c97423a76642..beee0cf89bce 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -21,6 +21,7 @@
#include "intel_reset.h"
#include "uc/intel_guc.h"
+#include "uc/intel_guc_submission.h"
#define RESET_MAX_RETRIES 3
@@ -40,27 +41,29 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
static void engine_skip_context(struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
- struct i915_gem_context *hung_ctx = rq->gem_context;
+ struct intel_context *hung_ctx = rq->context;
if (!i915_request_is_active(rq))
return;
lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
- if (rq->gem_context == hung_ctx)
+ if (rq->context == hung_ctx)
i915_request_skip(rq, -EIO);
}
-static void client_mark_guilty(struct drm_i915_file_private *file_priv,
- const struct i915_gem_context *ctx)
+static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
{
- unsigned int score;
+ struct drm_i915_file_private *file_priv = ctx->file_priv;
unsigned long prev_hang;
+ unsigned int score;
+
+ if (IS_ERR_OR_NULL(file_priv))
+ return;
- if (i915_gem_context_is_banned(ctx))
+ score = 0;
+ if (banned)
score = I915_CLIENT_SCORE_CONTEXT_BAN;
- else
- score = 0;
prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
@@ -75,17 +78,38 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
}
}
-static bool context_mark_guilty(struct i915_gem_context *ctx)
+static bool mark_guilty(struct i915_request *rq)
{
+ struct i915_gem_context *ctx;
unsigned long prev_hang;
bool banned;
int i;
+ rcu_read_lock();
+ ctx = rcu_dereference(rq->context->gem_context);
+ if (ctx && !kref_get_unless_zero(&ctx->ref))
+ ctx = NULL;
+ rcu_read_unlock();
+ if (!ctx)
+ return false;
+
+ if (i915_gem_context_is_closed(ctx)) {
+ intel_context_set_banned(rq->context);
+ banned = true;
+ goto out;
+ }
+
atomic_inc(&ctx->guilty_count);
/* Cool contexts are too cool to be banned! (Used for reset testing.) */
- if (!i915_gem_context_is_bannable(ctx))
- return false;
+ if (!i915_gem_context_is_bannable(ctx)) {
+ banned = false;
+ goto out;
+ }
+
+ dev_notice(ctx->i915->drm.dev,
+ "%s context reset due to GPU hang\n",
+ ctx->name);
/* Record the timestamp for the last N hangs */
prev_hang = ctx->hang_timestamp[0];
@@ -100,38 +124,43 @@ static bool context_mark_guilty(struct i915_gem_context *ctx)
if (banned) {
DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
ctx->name, atomic_read(&ctx->guilty_count));
- i915_gem_context_set_banned(ctx);
+ intel_context_set_banned(rq->context);
}
- if (!IS_ERR_OR_NULL(ctx->file_priv))
- client_mark_guilty(ctx->file_priv, ctx);
+ client_mark_guilty(ctx, banned);
+out:
+ i915_gem_context_put(ctx);
return banned;
}
-static void context_mark_innocent(struct i915_gem_context *ctx)
+static void mark_innocent(struct i915_request *rq)
{
- atomic_inc(&ctx->active_count);
+ struct i915_gem_context *ctx;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(rq->context->gem_context);
+ if (ctx)
+ atomic_inc(&ctx->active_count);
+ rcu_read_unlock();
}
void __i915_request_reset(struct i915_request *rq, bool guilty)
{
- GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n",
- rq->engine->name,
- rq->fence.context,
- rq->fence.seqno,
- yesno(guilty));
+ RQ_TRACE(rq, "guilty? %s\n", yesno(guilty));
GEM_BUG_ON(i915_request_completed(rq));
+ rcu_read_lock(); /* protect the GEM context */
if (guilty) {
i915_request_skip(rq, -EIO);
- if (context_mark_guilty(rq->gem_context))
+ if (mark_guilty(rq))
engine_skip_context(rq);
} else {
dma_fence_set_error(&rq->fence, -EAGAIN);
- context_mark_innocent(rq->gem_context);
+ mark_innocent(rq);
}
+ rcu_read_unlock();
}
static bool i915_in_reset(struct pci_dev *pdev)
@@ -218,9 +247,8 @@ out:
return ret;
}
-static int ironlake_do_reset(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_uncore *uncore = gt->uncore;
int ret;
@@ -564,7 +592,7 @@ static reset_func intel_get_gpu_reset(const struct intel_gt *gt)
else if (INTEL_GEN(i915) >= 6)
return gen6_reset_engines;
else if (INTEL_GEN(i915) >= 5)
- return ironlake_do_reset;
+ return ilk_do_reset;
else if (IS_G4X(i915))
return g4x_do_reset;
else if (IS_G33(i915) || IS_PINEVIEW(i915))
@@ -592,7 +620,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
*/
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
- GEM_TRACE("engine_mask=%x\n", engine_mask);
+ GT_TRACE(gt, "engine_mask=%x\n", engine_mask);
preempt_disable();
ret = reset(gt, engine_mask, retry);
preempt_enable();
@@ -647,7 +675,8 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
* GPU state upon resume, i.e. fail to restart after a reset.
*/
intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
- engine->reset.prepare(engine);
+ if (engine->reset.prepare)
+ engine->reset.prepare(engine);
}
static void revoke_mmaps(struct intel_gt *gt)
@@ -667,8 +696,13 @@ static void revoke_mmaps(struct intel_gt *gt)
continue;
GEM_BUG_ON(vma->fence != &gt->ggtt->fence_regs[i]);
- node = &vma->obj->base.vma_node;
+
+ if (!vma->mmo)
+ continue;
+
+ node = &vma->mmo->vma_node;
vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
+
unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping,
drm_vma_node_offset_addr(node) + vma_offset,
vma->size,
@@ -722,10 +756,11 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
static void reset_finish_engine(struct intel_engine_cs *engine)
{
- engine->reset.finish(engine);
+ if (engine->reset.finish)
+ engine->reset.finish(engine);
intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
- intel_engine_breadcrumbs_irq(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
@@ -745,8 +780,7 @@ static void nop_submit_request(struct i915_request *request)
struct intel_engine_cs *engine = request->engine;
unsigned long flags;
- GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
- engine->name, request->fence.context, request->fence.seqno);
+ RQ_TRACE(request, "-EIO\n");
dma_fence_set_error(&request->fence, -EIO);
spin_lock_irqsave(&engine->active.lock, flags);
@@ -754,7 +788,7 @@ static void nop_submit_request(struct i915_request *request)
i915_request_mark_complete(request);
spin_unlock_irqrestore(&engine->active.lock, flags);
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
static void __intel_gt_set_wedged(struct intel_gt *gt)
@@ -773,7 +807,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
intel_engine_dump(engine, &p, "%s\n", engine->name);
}
- GEM_TRACE("start\n");
+ GT_TRACE(gt, "start\n");
/*
* First, stop submission to hw, but do not yet complete requests by
@@ -799,11 +833,12 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
/* Mark all executing requests as skipped */
for_each_engine(engine, gt, id)
- engine->cancel_requests(engine);
+ if (engine->reset.cancel)
+ engine->reset.cancel(engine);
reset_finish(gt, awake);
- GEM_TRACE("end\n");
+ GT_TRACE(gt, "end\n");
}
void intel_gt_set_wedged(struct intel_gt *gt)
@@ -820,7 +855,6 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
{
struct intel_gt_timelines *timelines = &gt->timelines;
struct intel_timeline *tl;
- unsigned long flags;
bool ok;
if (!test_bit(I915_WEDGED, &gt->reset.flags))
@@ -830,7 +864,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
if (test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags))
return false;
- GEM_TRACE("start\n");
+ GT_TRACE(gt, "start\n");
/*
* Before unwedging, make sure that all pending operations
@@ -842,7 +876,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
*
* No more can be submitted until we reset the wedged bit.
*/
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
list_for_each_entry(tl, &timelines->active_list, link) {
struct dma_fence *fence;
@@ -850,7 +884,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
if (!fence)
continue;
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
/*
* All internal dependencies (i915_requests) will have
@@ -863,10 +897,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
dma_fence_put(fence);
/* Restart iteration after droping lock */
- spin_lock_irqsave(&timelines->lock, flags);
+ spin_lock(&timelines->lock);
tl = list_entry(&timelines->active_list, typeof(*tl), link);
}
- spin_unlock_irqrestore(&timelines->lock, flags);
+ spin_unlock(&timelines->lock);
/* We must reset pending GPU events before restoring our submission */
ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
@@ -892,7 +926,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
*/
intel_engines_reset_default_submission(gt);
- GEM_TRACE("end\n");
+ GT_TRACE(gt, "end\n");
smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
clear_bit(I915_WEDGED, &gt->reset.flags);
@@ -967,7 +1001,7 @@ void intel_gt_reset(struct intel_gt *gt,
intel_engine_mask_t awake;
int ret;
- GEM_TRACE("flags=%lx\n", gt->reset.flags);
+ GT_TRACE(gt, "flags=%lx\n", gt->reset.flags);
might_sleep();
GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
@@ -1070,9 +1104,10 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
{
struct intel_gt *gt = engine->gt;
+ bool uses_guc = intel_engine_in_guc_submission_mode(engine);
int ret;
- GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags);
+ ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags));
if (!intel_engine_pm_get_if_awake(engine))
@@ -1085,14 +1120,14 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
"Resetting %s for %s\n", engine->name, msg);
atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
- if (!engine->gt->uc.guc.execbuf_client)
+ if (!uses_guc)
ret = intel_gt_reset_engine(engine);
else
ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
if (ret) {
/* If we fail here, we expect to fallback to a global reset */
DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
- engine->gt->uc.guc.execbuf_client ? "GuC " : "",
+ uses_guc ? "GuC " : "",
engine->name, ret);
goto out;
}
@@ -1195,7 +1230,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
if (flags & I915_ERROR_CAPTURE) {
- i915_capture_error_state(gt->i915, engine_mask, msg);
+ i915_capture_error_state(gt->i915);
intel_gt_clear_error_registers(gt, engine_mask);
}
@@ -1288,10 +1323,10 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
if (!intel_gt_is_wedged(gt))
return 0;
- /* Reset still in progress? Maybe we will recover? */
- if (!test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
+ if (intel_gt_has_init_error(gt))
return -EIO;
+ /* Reset still in progress? Maybe we will recover? */
if (wait_event_interruptible(gt->reset.queue,
!test_bit(I915_RESET_BACKOFF,
&gt->reset.flags)))
@@ -1313,6 +1348,9 @@ void intel_gt_init_reset(struct intel_gt *gt)
init_waitqueue_head(&gt->reset.queue);
mutex_init(&gt->reset.mutex);
init_srcu_struct(&gt->reset.backoff_srcu);
+
+ /* no GPU until we are ready! */
+ __set_bit(I915_WEDGED, &gt->reset.flags);
}
void intel_gt_fini_reset(struct intel_gt *gt)