From b300fde8965fdd628341c4b602481ebde8ac9cb7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Feb 2019 09:49:21 +0000 Subject: drm/i915: Remove i915_request.global_seqno Having weaned the interrupt handling off using a single global execution queue, we no longer need to emit a global_seqno. Note that we still have a few assumptions about execution order along engine timelines, but this removes the most obvious artefact! Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190226094922.31617-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 8bc8aa54aa35..20cbceeabeae 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -535,7 +535,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) spin_lock(&client->wq_lock); guc_wq_item_append(client, engine->guc_id, ctx_desc, - ring_tail, rq->global_seqno); + ring_tail, rq->fence.seqno); guc_ring_doorbell(client); client->submissions[engine->id] += 1; -- cgit v1.2.3 From 32eb6bcfdda9dad240cf6a22fda2b3418b1a1b8e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Feb 2019 10:20:33 +0000 Subject: drm/i915: Make request allocation caches global As kmem_caches share the same properties (size, allocation/free behaviour) for all potential devices, we can use global caches. While this potential has worse fragmentation behaviour (one can argue that different devices would have different activity lifetimes, but you can also argue that activity is temporal across the system) it is the default behaviour of the system at large to amalgamate matching caches. The benefit for us is much reduced pointer dancing along the frequent allocation paths. v2: Defer shrinking until after a global grace period for futureproofing multiple consumers of the slab caches, similar to the current strategy for avoiding shrinking too early. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190228102035.5857-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_submission.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 20cbceeabeae..4366db7978a8 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -781,8 +781,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) } rb_erase_cached(&p->node, &execlists->queue); - if (p->priority != I915_PRIORITY_NORMAL) - kmem_cache_free(engine->i915->priorities, p); + i915_priolist_free(p); } done: execlists->queue_priority_hint = -- cgit v1.2.3 From b5773a3616d10bdef60662b8780ded912ee5d7b6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Feb 2019 22:06:39 +0000 Subject: drm/i915/execlists: Suppress mere WAIT preemption WAIT is occasionally suppressed by virtue of preempted requests being promoted to NEWCLIENT if they have not all ready received that boost. Make this consistent for all WAIT boosts that they are not allowed to preempt executing contexts and are merely granted the right to be at the front of the queue for the next execution slot. This is in keeping with the desire that the WAIT boost be a minor tweak that does not give excessive promotion to its user and open ourselves to trivial abuse. The problem with the inconsistent WAIT preemption becomes more apparent as the preemption is propagated across the engines, where one engine may preempt and the other not, and we be relying on the exact execution order being consistent across engines (e.g. using HW semaphores to coordinate parallel execution). v2: Also protect GuC submission from false preemption loops. v3: Build bug safeguards and better debug messages for st. v4: Do the priority bumping in unsubmit (i.e. on preemption/reset unwind), applying it earlier during submit causes out-of-order execution combined with execute fences. v5: Call sw_fence_fini for our dummy request (Matthew) Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Cc: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190228220639.3173-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 4366db7978a8..56ba2fcbabe6 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -720,7 +720,7 @@ static inline int rq_prio(const struct i915_request *rq) static inline int port_prio(const struct execlist_port *port) { - return rq_prio(port_request(port)); + return rq_prio(port_request(port)) | __NO_PREEMPTION; } static bool __guc_dequeue(struct intel_engine_cs *engine) -- cgit v1.2.3 From 8a68d464366efb5b294fa11ccf23b51306cc2695 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Mar 2019 18:03:30 +0000 Subject: drm/i915: Store the BIT(engine->id) as the engine's mask In the next patch, we are introducing a broad virtual engine to encompass multiple physical engines, losing the 1:1 nature of BIT(engine->id). To reflect the broader set of engines implied by the virtual instance, lets store the full bitmask. v2: Use intel_engine_mask_t (s/ring_mask/engine_mask/) v3: Tvrtko voted for moah churn so teach everyone to not mention ring and use $class$instance throughout. v4: Comment upon the disparity in bspec for using VCS1,VCS2 in gen8 and VCS[0-4] in later gen. We opt to keep the code consistent and use 0-index naming throughout. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190305180332.30900-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_submission.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 56ba2fcbabe6..e3afc91f0e7b 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -575,7 +575,7 @@ static void inject_preempt_context(struct work_struct *work) u32 *cs; cs = ce->ring->vaddr; - if (engine->id == RCS) { + if (engine->class == RENDER_CLASS) { cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, addr, @@ -1030,7 +1030,7 @@ static int guc_clients_create(struct intel_guc *guc) GEM_BUG_ON(guc->preempt_client); client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, + INTEL_INFO(dev_priv)->engine_mask, GUC_CLIENT_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (IS_ERR(client)) { @@ -1041,7 +1041,7 @@ static int guc_clients_create(struct intel_guc *guc) if (dev_priv->preempt_context) { client = guc_client_alloc(dev_priv, - INTEL_INFO(dev_priv)->ring_mask, + INTEL_INFO(dev_priv)->engine_mask, GUC_CLIENT_PRIORITY_KMD_HIGH, dev_priv->preempt_context); if (IS_ERR(client)) { -- cgit v1.2.3 From c4d52feb2c46ddcdde4058cf03f8b9eb996bb09b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Mar 2019 13:25:19 +0000 Subject: drm/i915: Move over to intel_context_lookup() In preparation for an ever growing number of engines and so ever increasing static array of HW contexts within the GEM context, move the array over to an rbtree, allocated upon first use. Unfortunately, this imposes an rbtree lookup at a few frequent callsites, but we should be able to mitigate those by moving over to using the HW context as our primary type and so only incur the lookup on the boundary with the user GEM context and engines. v2: Check for no HW context in guc_stage_desc_init Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190308132522.21573-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_submission.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index e3afc91f0e7b..4a5727233419 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -382,7 +382,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client) desc->db_id = client->doorbell_id; for_each_engine_masked(engine, dev_priv, client->engines, tmp) { - struct intel_context *ce = to_intel_context(ctx, engine); + struct intel_context *ce = intel_context_lookup(ctx, engine); u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; @@ -393,7 +393,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client) * for now who owns a GuC client. But for future owner of GuC * client, need to make sure lrc is pinned prior to enter here. */ - if (!ce->state) + if (!ce || !ce->state) break; /* XXX: continue? */ /* @@ -567,7 +567,7 @@ static void inject_preempt_context(struct work_struct *work) preempt_work[engine->id]); struct intel_guc_client *client = guc->preempt_client; struct guc_stage_desc *stage_desc = __get_stage_desc(client); - struct intel_context *ce = to_intel_context(client->owner, engine); + struct intel_context *ce = intel_context_lookup(client->owner, engine); u32 data[7]; if (!ce->ring->emit) { /* recreate upon load/resume */ -- cgit v1.2.3 From 54939ea0bd85e128bdd5bca579508dd4701c5ce9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 Mar 2019 09:51:51 +0000 Subject: drm/i915: Switch to use HWS indices rather than addresses If we use the STORE_DATA_INDEX function we can use a fixed offset and avoid having to lookup up the engine HWS address. A step closer to being able to emit the final breadcrumb during request_add rather than later in the submission interrupt handler. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190318095204.9913-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_submission.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 4a5727233419..c4ad73980988 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -583,7 +583,8 @@ static void inject_preempt_context(struct work_struct *work) } else { cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, - addr); + addr, + 0); *cs++ = MI_NOOP; *cs++ = MI_NOOP; } -- cgit v1.2.3 From 4c6ce5c99084411391ba892a821f61cf42c79157 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 Mar 2019 15:49:12 +0000 Subject: drm/i915: Move the decision to use the breadcrumb tasklet to the backend Use the engine->flags to store whether we want to kick the submission tasklet on receipt of a breadcrumb interrupt, so that this decision can be made by the submission backend and not dependent on a limited feature test within the interrupt handler. This should make it easier to adapt to different submission backends. Signed-off-by: Chris Wilson Cc: Michal Wajdeczko Cc: Xiaolin Zhang Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190329154912.13781-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_guc_submission.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index c4ad73980988..c58922226d47 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -1262,10 +1262,12 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv) static void guc_submission_park(struct intel_engine_cs *engine) { intel_engine_unpin_breadcrumbs_irq(engine); + engine->flags &= ~I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; } static void guc_submission_unpark(struct intel_engine_cs *engine) { + engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; intel_engine_pin_breadcrumbs_irq(engine); } -- cgit v1.2.3 From bfd04533138427846ab711c68320c71c6275a0a3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 8 Apr 2019 10:17:01 +0100 Subject: drm/i915/guc: Replace WARN with a DRM_ERROR Replace the WARN with a simple if() + error message to squech the sparse warning that entire wait_for() macro was being stringified: drivers/gpu/drm/i915/intel_guc_submission.c:658:9: error: too long token expansion Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190408091728.20207-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_submission.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index c58922226d47..42fcd622d7a3 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -650,9 +650,10 @@ static void wait_for_guc_preempt_report(struct intel_engine_cs *engine) struct guc_ctx_report *report = &data->preempt_ctx_report[engine->guc_id]; - WARN_ON(wait_for_atomic(report->report_return_status == - INTEL_GUC_REPORT_STATUS_COMPLETE, - GUC_PREEMPT_POSTPROCESS_DELAY_MS)); + if (wait_for_atomic(report->report_return_status == + INTEL_GUC_REPORT_STATUS_COMPLETE, + GUC_PREEMPT_POSTPROCESS_DELAY_MS)) + DRM_ERROR("Timed out waiting for GuC preemption report\n"); /* * GuC is expecting that we're also going to clear the affected context * counter, let's also reset the return status to not depend on GuC -- cgit v1.2.3 From 174221e8491588aa9d44ebfa3242bea11eacc64f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 8 Apr 2019 10:17:14 +0100 Subject: drm/i915/guc: Replace preempt_client lookup with engine->preempt_context Circumvent the dance we currently perform to find the preempt_client and lookup its HW context for this engine, as we know we have already pinned the preempt_context on the engine. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190408091728.20207-15-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 42fcd622d7a3..dea87253d141 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -567,7 +567,7 @@ static void inject_preempt_context(struct work_struct *work) preempt_work[engine->id]); struct intel_guc_client *client = guc->preempt_client; struct guc_stage_desc *stage_desc = __get_stage_desc(client); - struct intel_context *ce = intel_context_lookup(client->owner, engine); + struct intel_context *ce = engine->preempt_context; u32 data[7]; if (!ce->ring->emit) { /* recreate upon load/resume */ -- cgit v1.2.3 From 292ad25c22d96583b76ce714b7a06941d54bd939 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Apr 2019 14:05:14 +0100 Subject: drm/i915/guc: Implement reset locally Before causing guc and execlists to diverge further (breaking guc in the process), take a copy of the current reset procedure and make it local to the guc submission backend Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190411130515.20716-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_guc_submission.c | 102 ++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_guc_submission.c') diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index dea87253d141..37f60cb8e9e1 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -872,6 +872,104 @@ static void guc_reset_prepare(struct intel_engine_cs *engine) flush_workqueue(engine->i915->guc.preempt_wq); } +static void guc_reset(struct intel_engine_cs *engine, bool stalled) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *rq; + unsigned long flags; + + spin_lock_irqsave(&engine->timeline.lock, flags); + + execlists_cancel_port_requests(execlists); + + /* Push back any incomplete requests for replay after the reset. */ + rq = execlists_unwind_incomplete_requests(execlists); + if (!rq) + goto out_unlock; + + if (!i915_request_started(rq)) + stalled = false; + + i915_reset_request(rq, stalled); + intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled); + +out_unlock: + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + +static void guc_cancel_requests(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *rq, *rn; + struct rb_node *rb; + unsigned long flags; + + GEM_TRACE("%s\n", engine->name); + + /* + * Before we call engine->cancel_requests(), we should have exclusive + * access to the submission state. This is arranged for us by the + * caller disabling the interrupt generation, the tasklet and other + * threads that may then access the same state, giving us a free hand + * to reset state. However, we still need to let lockdep be aware that + * we know this state may be accessed in hardirq context, so we + * disable the irq around this manipulation and we want to keep + * the spinlock focused on its duties and not accidentally conflate + * coverage to the submission's irq state. (Similarly, although we + * shouldn't need to disable irq around the manipulation of the + * submission's irq state, we also wish to remind ourselves that + * it is irq state.) + */ + spin_lock_irqsave(&engine->timeline.lock, flags); + + /* Cancel the requests on the HW and clear the ELSP tracker. */ + execlists_cancel_port_requests(execlists); + + /* Mark all executing requests as skipped. */ + list_for_each_entry(rq, &engine->timeline.requests, link) { + if (!i915_request_signaled(rq)) + dma_fence_set_error(&rq->fence, -EIO); + + i915_request_mark_complete(rq); + } + + /* Flush the queued requests to the timeline list (for retiring). */ + while ((rb = rb_first_cached(&execlists->queue))) { + struct i915_priolist *p = to_priolist(rb); + int i; + + priolist_for_each_request_consume(rq, rn, p, i) { + list_del_init(&rq->sched.link); + __i915_request_submit(rq); + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); + } + + rb_erase_cached(&p->node, &execlists->queue); + i915_priolist_free(p); + } + + /* Remaining _unready_ requests will be nop'ed when submitted */ + + execlists->queue_priority_hint = INT_MIN; + execlists->queue = RB_ROOT_CACHED; + GEM_BUG_ON(port_isset(execlists->port)); + + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + +static void guc_reset_finish(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + if (__tasklet_enable(&execlists->tasklet)) + /* And kick in case we missed a new request submission. */ + tasklet_hi_schedule(&execlists->tasklet); + + GEM_TRACE("%s: depth->%d\n", engine->name, + atomic_read(&execlists->tasklet.count)); +} + /* * Everything below here is concerned with setup & teardown, and is * therefore not part of the somewhat time-critical batch-submission @@ -1293,6 +1391,10 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) engine->unpark = guc_submission_unpark; engine->reset.prepare = guc_reset_prepare; + engine->reset.reset = guc_reset; + engine->reset.finish = guc_reset_finish; + + engine->cancel_requests = guc_cancel_requests; engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; } -- cgit v1.2.3