summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c67
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c83
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c50
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c35
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline_types.h5
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c7
16 files changed, 264 insertions, 64 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ee9d2bcd2c13..ef7bc41ffffa 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -310,10 +310,23 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
GEM_BUG_ON(rq->hw_context == ce);
if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
- err = mutex_lock_interruptible_nested(&tl->mutex,
- SINGLE_DEPTH_NESTING);
- if (err)
- return err;
+ /*
+ * Ideally, we just want to insert our foreign fence as
+ * a barrier into the remove context, such that this operation
+ * occurs after all current operations in that context, and
+ * all future operations must occur after this.
+ *
+ * Currently, the timeline->last_request tracking is guarded
+ * by its mutex and so we must obtain that to atomically
+ * insert our barrier. However, since we already hold our
+ * timeline->mutex, we must be careful against potential
+ * inversion if we are the kernel_context as the remote context
+ * will itself poke at the kernel_context when it needs to
+ * unpin. Ergo, if already locked, we drop both locks and
+ * try again (through the magic of userspace repeating EAGAIN).
+ */
+ if (!mutex_trylock(&tl->mutex))
+ return -EAGAIN;
/* Queue this switch after current activity by this context. */
err = i915_active_fence_set(&tl->last_request, rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index bc3b72bfa9e3..01765a7ec18f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
static inline struct i915_request *
execlists_active(const struct intel_engine_execlists *execlists)
{
- GEM_BUG_ON(execlists->active - execlists->inflight >
- execlists_num_ports(execlists));
- return READ_ONCE(*execlists->active);
+ return *READ_ONCE(execlists->active);
}
static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 5ca3ec911e50..813bd3a610d2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -28,13 +28,13 @@
#include "i915_drv.h"
-#include "gt/intel_gt.h"
-
+#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
#include "intel_engine_pool.h"
#include "intel_engine_user.h"
-#include "intel_context.h"
+#include "intel_gt.h"
+#include "intel_gt_requests.h"
#include "intel_lrc.h"
#include "intel_reset.h"
#include "intel_ring.h"
@@ -616,6 +616,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_execlists(engine);
intel_engine_init_cmd_parser(engine);
intel_engine_init__pm(engine);
+ intel_engine_init_retire(engine);
intel_engine_pool_init(&engine->pool);
@@ -838,6 +839,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
cleanup_status_page(engine);
+ intel_engine_fini_retire(engine);
intel_engine_pool_fini(&engine->pool);
intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 874d82677179..c1dd0cd3efc7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -73,8 +73,42 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
+static void
+__queue_and_release_pm(struct i915_request *rq,
+ struct intel_timeline *tl,
+ struct intel_engine_cs *engine)
+{
+ struct intel_gt_timelines *timelines = &engine->gt->timelines;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * We have to serialise all potential retirement paths with our
+ * submission, as we don't want to underflow either the
+ * engine->wakeref.counter or our timeline->active_count.
+ *
+ * Equally, we cannot allow a new submission to start until
+ * after we finish queueing, nor could we allow that submitter
+ * to retire us before we are ready!
+ */
+ spin_lock(&timelines->lock);
+
+ /* Let intel_gt_retire_requests() retire us (acquired under lock) */
+ if (!atomic_fetch_inc(&tl->active_count))
+ list_add_tail(&tl->link, &timelines->active_list);
+
+ /* Hand the request over to HW and so engine_retire() */
+ __i915_request_queue(rq, NULL);
+
+ /* Let new submissions commence (and maybe retire this timeline) */
+ __intel_wakeref_defer_park(&engine->wakeref);
+
+ spin_unlock(&timelines->lock);
+}
+
static bool switch_to_kernel_context(struct intel_engine_cs *engine)
{
+ struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
unsigned long flags;
bool result = true;
@@ -98,16 +132,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* This should hold true as we can only park the engine after
* retiring the last request, thus all rings should be empty and
* all timelines idle.
+ *
+ * For unlocking, there are 2 other parties and the GPU who have a
+ * stake here.
+ *
+ * A new gpu user will be waiting on the engine-pm to start their
+ * engine_unpark. New waiters are predicated on engine->wakeref.count
+ * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
+ * engine->wakeref.
+ *
+ * The other party is intel_gt_retire_requests(), which is walking the
+ * list of active timelines looking for completions. Meanwhile as soon
+ * as we call __i915_request_queue(), the GPU may complete our request.
+ * Ergo, if we put ourselves on the timelines.active_list
+ * (se intel_timeline_enter()) before we increment the
+ * engine->wakeref.count, we may see the request completion and retire
+ * it causing an undeflow of the engine->wakeref.
*/
- flags = __timeline_mark_lock(engine->kernel_context);
+ flags = __timeline_mark_lock(ce);
+ GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
- rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
+ rq = __i915_request_create(ce, GFP_NOWAIT);
if (IS_ERR(rq))
/* Context switch failed, hope for the best! Maybe reset? */
goto out_unlock;
- intel_timeline_enter(i915_request_timeline(rq));
-
/* Check again on the next retirement. */
engine->wakeref_serial = engine->serial + 1;
i915_request_add_active_barriers(rq);
@@ -116,13 +165,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
__i915_request_commit(rq);
- /* Release our exclusive hold on the engine */
- __intel_wakeref_defer_park(&engine->wakeref);
- __i915_request_queue(rq, NULL);
+ /* Expose ourselves to the world */
+ __queue_and_release_pm(rq, ce->timeline, engine);
result = false;
out_unlock:
- __timeline_mark_unlock(engine->kernel_context, flags);
+ __timeline_mark_unlock(ce, flags);
return result;
}
@@ -177,7 +225,8 @@ static int __engine_park(struct intel_wakeref *wf)
engine->execlists.no_priolist = false;
- intel_gt_pm_put(engine->gt);
+ /* While gt calls i915_vma_parked(), we have to break the lock cycle */
+ intel_gt_pm_put_async(engine->gt);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 739c50fefcef..24e20344dc22 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
intel_wakeref_put(&engine->wakeref);
}
+static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
+{
+ intel_wakeref_put_async(&engine->wakeref);
+}
+
+static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
+{
+ intel_wakeref_unlock_wait(&engine->wakeref);
+}
+
void intel_engine_init__pm(struct intel_engine_cs *engine);
#endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 758f0e8ec672..17f1f1441efc 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -451,6 +451,14 @@ struct intel_engine_cs {
struct intel_engine_execlists execlists;
+ /*
+ * Keep track of completed timelines on this engine for early
+ * retirement with the goal of quickly enabling powersaving as
+ * soon as the engine is idle.
+ */
+ struct intel_timeline *retire;
+ struct work_struct retire_work;
+
/* status_notifier: list of callbacks for context-switch changes */
struct atomic_notifier_head context_status_notifier;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 6187cdd06646..a459a42ad5c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -105,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf)
static const struct intel_wakeref_ops wf_ops = {
.get = __gt_unpark,
.put = __gt_park,
- .flags = INTEL_WAKEREF_PUT_ASYNC,
};
void intel_gt_pm_init_early(struct intel_gt *gt)
@@ -272,7 +271,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt)
static suspend_state_t pm_suspend_target(void)
{
-#if IS_ENABLED(CONFIG_PM_SLEEP)
+#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP)
return pm_suspend_target_state;
#else
return PM_SUSPEND_TO_IDLE;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index b3e17399be9b..990efc27a4e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt)
intel_wakeref_put(&gt->wakeref);
}
+static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+{
+ intel_wakeref_put_async(&gt->wakeref);
+}
+
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 353809ac2754..3dc13ecf41bf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -4,6 +4,8 @@
* Copyright © 2019 Intel Corporation
*/
+#include <linux/workqueue.h>
+
#include "i915_drv.h" /* for_each_engine() */
#include "i915_request.h"
#include "intel_gt.h"
@@ -29,6 +31,79 @@ static void flush_submission(struct intel_gt *gt)
intel_engine_flush_submission(engine);
}
+static void engine_retire(struct work_struct *work)
+{
+ struct intel_engine_cs *engine =
+ container_of(work, typeof(*engine), retire_work);
+ struct intel_timeline *tl = xchg(&engine->retire, NULL);
+
+ do {
+ struct intel_timeline *next = xchg(&tl->retire, NULL);
+
+ /*
+ * Our goal here is to retire _idle_ timelines as soon as
+ * possible (as they are idle, we do not expect userspace
+ * to be cleaning up anytime soon).
+ *
+ * If the timeline is currently locked, either it is being
+ * retired elsewhere or about to be!
+ */
+ if (mutex_trylock(&tl->mutex)) {
+ retire_requests(tl);
+ mutex_unlock(&tl->mutex);
+ }
+ intel_timeline_put(tl);
+
+ GEM_BUG_ON(!next);
+ tl = ptr_mask_bits(next, 1);
+ } while (tl);
+}
+
+static bool add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl)
+{
+ struct intel_timeline *first;
+
+ /*
+ * We open-code a llist here to include the additional tag [BIT(0)]
+ * so that we know when the timeline is already on a
+ * retirement queue: either this engine or another.
+ *
+ * However, we rely on that a timeline can only be active on a single
+ * engine at any one time and that add_retire() is called before the
+ * engine releases the timeline and transferred to another to retire.
+ */
+
+ if (READ_ONCE(tl->retire)) /* already queued */
+ return false;
+
+ intel_timeline_get(tl);
+ first = READ_ONCE(engine->retire);
+ do
+ tl->retire = ptr_pack_bits(first, 1, 1);
+ while (!try_cmpxchg(&engine->retire, &first, tl));
+
+ return !first;
+}
+
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl)
+{
+ if (add_retire(engine, tl))
+ schedule_work(&engine->retire_work);
+}
+
+void intel_engine_init_retire(struct intel_engine_cs *engine)
+{
+ INIT_WORK(&engine->retire_work, engine_retire);
+}
+
+void intel_engine_fini_retire(struct intel_engine_cs *engine)
+{
+ flush_work(&engine->retire_work);
+ GEM_BUG_ON(engine->retire);
+}
+
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
{
struct intel_gt_timelines *timelines = &gt->timelines;
@@ -52,8 +127,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
}
intel_timeline_get(tl);
- GEM_BUG_ON(!tl->active_count);
- tl->active_count++; /* pin the list element */
+ GEM_BUG_ON(!atomic_read(&tl->active_count));
+ atomic_inc(&tl->active_count); /* pin the list element */
spin_unlock_irqrestore(&timelines->lock, flags);
if (timeout > 0) {
@@ -74,7 +149,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
/* Resume iteration after dropping lock */
list_safe_reset_next(tl, tn, link);
- if (!--tl->active_count)
+ if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
else
active_count += !!rcu_access_pointer(tl->last_request.fence);
@@ -83,7 +158,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
- GEM_BUG_ON(tl->active_count);
+ GEM_BUG_ON(atomic_read(&tl->active_count));
list_add(&tl->link, &free);
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
index bd31cbce47e0..d626fb115386 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -7,7 +7,9 @@
#ifndef INTEL_GT_REQUESTS_H
#define INTEL_GT_REQUESTS_H
+struct intel_engine_cs;
struct intel_gt;
+struct intel_timeline;
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
static inline void intel_gt_retire_requests(struct intel_gt *gt)
@@ -15,6 +17,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt)
intel_gt_retire_requests_timeout(gt, 0);
}
+void intel_engine_init_retire(struct intel_engine_cs *engine);
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl);
+void intel_engine_fini_retire(struct intel_engine_cs *engine);
+
int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
void intel_gt_init_requests(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0ac3b26674ad..9fdefbdc3546 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -142,6 +142,7 @@
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#include "intel_reset.h"
@@ -1115,9 +1116,17 @@ __execlists_schedule_out(struct i915_request *rq,
* refrain from doing non-trivial work here.
*/
+ /*
+ * If we have just completed this context, the engine may now be
+ * idle and we want to re-enter powersaving.
+ */
+ if (list_is_last(&rq->link, &ce->timeline->requests) &&
+ i915_request_completed(rq))
+ intel_engine_add_retire(engine, ce->timeline);
+
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
- intel_gt_pm_put(engine->gt);
+ intel_gt_pm_put_async(engine->gt);
/*
* If this is part of a virtual engine, its next request may
@@ -1937,16 +1946,17 @@ skip_submit:
static void
cancel_port_requests(struct intel_engine_execlists * const execlists)
{
- struct i915_request * const *port, *rq;
+ struct i915_request * const *port;
- for (port = execlists->pending; (rq = *port); port++)
- execlists_schedule_out(rq);
+ for (port = execlists->pending; *port; port++)
+ execlists_schedule_out(*port);
memset(execlists->pending, 0, sizeof(execlists->pending));
- for (port = execlists->active; (rq = *port); port++)
- execlists_schedule_out(rq);
- execlists->active =
- memset(execlists->inflight, 0, sizeof(execlists->inflight));
+ /* Mark the end of active before we overwrite *active */
+ for (port = xchg(&execlists->active, execlists->pending); *port; port++)
+ execlists_schedule_out(*port);
+ WRITE_ONCE(execlists->active,
+ memset(execlists->inflight, 0, sizeof(execlists->inflight)));
}
static inline void
@@ -2099,23 +2109,27 @@ static void process_csb(struct intel_engine_cs *engine)
else
promote = gen8_csb_parse(execlists, buf + 2 * head);
if (promote) {
+ struct i915_request * const *old = execlists->active;
+
+ /* Point active to the new ELSP; prevent overwriting */
+ WRITE_ONCE(execlists->active, execlists->pending);
+ set_timeslice(engine);
+
if (!inject_preempt_hang(execlists))
ring_set_paused(engine, 0);
/* cancel old inflight, prepare for switch */
- trace_ports(execlists, "preempted", execlists->active);
- while (*execlists->active)
- execlists_schedule_out(*execlists->active++);
+ trace_ports(execlists, "preempted", old);
+ while (*old)
+ execlists_schedule_out(*old++);
/* switch pending to inflight */
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
- execlists->active =
- memcpy(execlists->inflight,
- execlists->pending,
- execlists_num_ports(execlists) *
- sizeof(*execlists->pending));
-
- set_timeslice(engine);
+ WRITE_ONCE(execlists->active,
+ memcpy(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists) *
+ sizeof(*execlists->pending)));
WRITE_ONCE(execlists->pending[0], NULL);
} else {
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index f03e000051c1..c97423a76642 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1114,7 +1114,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
out:
intel_engine_cancel_stop_cs(engine);
reset_finish_engine(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index ece20504d240..374b28f13ca0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -57,9 +57,10 @@ int intel_ring_pin(struct intel_ring *ring)
i915_vma_make_unshrinkable(vma);
- GEM_BUG_ON(ring->vaddr);
- ring->vaddr = addr;
+ /* Discard any unused bytes beyond that submitted to hw. */
+ intel_ring_reset(ring, ring->emit);
+ ring->vaddr = addr;
return 0;
err_ring:
@@ -85,20 +86,14 @@ void intel_ring_unpin(struct intel_ring *ring)
if (!atomic_dec_and_test(&ring->pin_count))
return;
- /* Discard any unused bytes beyond that submitted to hw. */
- intel_ring_reset(ring, ring->emit);
-
i915_vma_unset_ggtt_write(vma);
if (i915_vma_is_map_and_fenceable(vma))
i915_vma_unpin_iomap(vma);
else
i915_gem_object_unpin_map(vma->obj);
- GEM_BUG_ON(!ring->vaddr);
- ring->vaddr = NULL;
-
- i915_vma_unpin(vma);
i915_vma_make_purgeable(vma);
+ i915_vma_unpin(vma);
}
static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 14ad10acd548..649798c184fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -282,6 +282,7 @@ void intel_timeline_fini(struct intel_timeline *timeline)
{
GEM_BUG_ON(atomic_read(&timeline->pin_count));
GEM_BUG_ON(!list_empty(&timeline->requests));
+ GEM_BUG_ON(timeline->retire);
if (timeline->hwsp_cacheline)
cacheline_free(timeline->hwsp_cacheline);
@@ -339,15 +340,33 @@ void intel_timeline_enter(struct intel_timeline *tl)
struct intel_gt_timelines *timelines = &tl->gt->timelines;
unsigned long flags;
+ /*
+ * Pretend we are serialised by the timeline->mutex.
+ *
+ * While generally true, there are a few exceptions to the rule
+ * for the engine->kernel_context being used to manage power
+ * transitions. As the engine_park may be called from under any
+ * timeline, it uses the power mutex as a global serialisation
+ * lock to prevent any other request entering its timeline.
+ *
+ * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
+ *
+ * However, intel_gt_retire_request() does not know which engine
+ * it is retiring along and so cannot partake in the engine-pm
+ * barrier, and there we use the tl->active_count as a means to
+ * pin the timeline in the active_list while the locks are dropped.
+ * Ergo, as that is outside of the engine-pm barrier, we need to
+ * use atomic to manipulate tl->active_count.
+ */
lockdep_assert_held(&tl->mutex);
-
GEM_BUG_ON(!atomic_read(&tl->pin_count));
- if (tl->active_count++)
+
+ if (atomic_add_unless(&tl->active_count, 1, 0))
return;
- GEM_BUG_ON(!tl->active_count); /* overflow? */
spin_lock_irqsave(&timelines->lock, flags);
- list_add(&tl->link, &timelines->active_list);
+ if (!atomic_fetch_inc(&tl->active_count))
+ list_add_tail(&tl->link, &timelines->active_list);
spin_unlock_irqrestore(&timelines->lock, flags);
}
@@ -356,14 +375,16 @@ void intel_timeline_exit(struct intel_timeline *tl)
struct intel_gt_timelines *timelines = &tl->gt->timelines;
unsigned long flags;
+ /* See intel_timeline_enter() */
lockdep_assert_held(&tl->mutex);
- GEM_BUG_ON(!tl->active_count);
- if (--tl->active_count)
+ GEM_BUG_ON(!atomic_read(&tl->active_count));
+ if (atomic_add_unless(&tl->active_count, -1, 1))
return;
spin_lock_irqsave(&timelines->lock, flags);
- list_del(&tl->link);
+ if (atomic_dec_and_test(&tl->active_count))
+ list_del(&tl->link);
spin_unlock_irqrestore(&timelines->lock, flags);
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 98d9ee166379..aaf15cbe1ce1 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -42,7 +42,7 @@ struct intel_timeline {
* from the intel_context caller plus internal atomicity.
*/
atomic_t pin_count;
- unsigned int active_count;
+ atomic_t active_count;
const u32 *hwsp_seqno;
struct i915_vma *hwsp_ggtt;
@@ -66,6 +66,9 @@ struct intel_timeline {
*/
struct i915_active_fence last_request;
+ /** A chain of completed timelines ready for early retirement. */
+ struct intel_timeline *retire;
+
/**
* We track the most recent seqno that we wait on in every context so
* that we only have to emit a new await and dependency on a more
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index 20b9c83f43ad..cbf6b0735272 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -51,11 +51,12 @@ static int live_engine_pm(void *arg)
pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
engine->name, p->name);
else
- intel_engine_pm_put(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
+ intel_engine_pm_put_async(engine);
p->critical_section_end();
- /* engine wakeref is sync (instant) */
+ intel_engine_pm_flush(engine);
+
if (intel_engine_pm_is_awake(engine)) {
pr_err("%s is still awake after flushing pm\n",
engine->name);