From cffa8e83df9fe525afad1e1099097413f9174f57 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 12 Sep 2024 17:45:07 -0400 Subject: drm/xe: Restore pci state upon resume The pci state was saved, but not restored. Restore right after the power state transition request like every other driver. v2: Use right fixes tag, since this was there initialy, but accidentally removed. Fixes: f6761c68c0ac ("drm/xe/display: Improve s2idle handling.") Cc: Maarten Lankhorst Cc: Lucas De Marchi Reviewed-by: Jonathan Cavitt Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240912214507.456897-1-rodrigo.vivi@intel.com Signed-off-by: Maarten Lankhorst (cherry picked from commit ec2d1539e159f53eae708e194c449cfefa004994) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 937c3e064f0d..5e962e72c97e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -924,6 +924,8 @@ static int xe_pci_resume(struct device *dev) if (err) return err; + pci_restore_state(pdev); + err = pci_enable_device(pdev); if (err) return err; -- cgit v1.2.3-58-ga151 From 790533e44bfc7af929842fccd9674c9f424d4627 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 24 Sep 2024 16:09:48 +0100 Subject: drm/xe/guc_submit: add missing locking in wedged_fini Any non-wedged queue can have a zero refcount here and can be running concurrently with an async queue destroy, therefore dereferencing the queue ptr to check wedge status after the lookup can trigger UAF if queue is not wedged. Fix this by keeping the submission_state lock held around the check to postpone the free and make the check safe, before dropping again around the put() to avoid the deadlock. Fixes: 8ed9aaae39f3 ("drm/xe: Force wedged state and block GT reset upon any GPU hang") Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240924150947.118433-2-matthew.auld@intel.com (cherry picked from commit d28af0b6b9580b9f90c265a7da0315b0ad20bbfd) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_submit.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index fbbe6a487bbb..715c761dc7d6 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -290,9 +290,15 @@ static void guc_submit_wedged_fini(void *arg) struct xe_exec_queue *q; unsigned long index; - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (exec_queue_wedged(q)) + mutex_lock(&guc->submission_state.lock); + xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) { + if (exec_queue_wedged(q)) { + mutex_unlock(&guc->submission_state.lock); xe_exec_queue_put(q); + mutex_lock(&guc->submission_state.lock); + } + } + mutex_unlock(&guc->submission_state.lock); } static const struct xe_exec_queue_ops guc_exec_queue_ops; -- cgit v1.2.3-58-ga151 From 2d2be279f1ca9e7288282d4214f16eea8a727cdb Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 23 Sep 2024 15:56:48 +0100 Subject: drm/xe: fix UAF around queue destruction We currently do stuff like queuing the final destruction step on a random system wq, which will outlive the driver instance. With bad timing we can teardown the driver with one or more work workqueue still being alive leading to various UAF splats. Add a fini step to ensure user queues are properly torn down. At this point GuC should already be nuked so queue itself should no longer be referenced from hw pov. v2 (Matt B) - Looks much safer to use a waitqueue and then just wait for the xa_array to become empty before triggering the drain. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2317 Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240923145647.77707-2-matthew.auld@intel.com (cherry picked from commit 861108666cc0e999cffeab6aff17b662e68774e3) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 6 +++++- drivers/gpu/drm/xe/xe_device_types.h | 3 +++ drivers/gpu/drm/xe/xe_guc_submit.c | 26 +++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_guc_types.h | 2 ++ 4 files changed, 35 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 70d4e4d46c3c..74e593caf87c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -298,6 +298,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) if (xe->unordered_wq) destroy_workqueue(xe->unordered_wq); + if (xe->destroy_wq) + destroy_workqueue(xe->destroy_wq); + ttm_device_fini(&xe->ttm); } @@ -363,8 +366,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); + xe->destroy_wq = alloc_workqueue("xe-destroy-wq", 0, 0); if (!xe->ordered_wq || !xe->unordered_wq || - !xe->preempt_fence_wq) { + !xe->preempt_fence_wq || !xe->destroy_wq) { /* * Cleanup done in xe_device_destroy via * drmm_add_action_or_reset register above diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index ec7eb7811126..24c8c2d20676 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -396,6 +396,9 @@ struct xe_device { /** @unordered_wq: used to serialize unordered work, mostly display */ struct workqueue_struct *unordered_wq; + /** @destroy_wq: used to serialize user destroy work, like queue */ + struct workqueue_struct *destroy_wq; + /** @tiles: device tiles */ struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE]; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 715c761dc7d6..98a6a385a796 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -276,10 +276,26 @@ static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) } #endif +static void xe_guc_submit_fini(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + ret = wait_event_timeout(guc->submission_state.fini_wq, + xa_empty(&guc->submission_state.exec_queue_lookup), + HZ * 5); + + drain_workqueue(xe->destroy_wq); + + xe_gt_assert(gt, ret); +} + static void guc_submit_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + xe_guc_submit_fini(guc); xa_destroy(&guc->submission_state.exec_queue_lookup); free_submit_wq(guc); } @@ -351,6 +367,8 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) xa_init(&guc->submission_state.exec_queue_lookup); + init_waitqueue_head(&guc->submission_state.fini_wq); + primelockdep(guc); return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); @@ -367,6 +385,9 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa xe_guc_id_mgr_release_locked(&guc->submission_state.idm, q->guc->id, q->width); + + if (xa_empty(&guc->submission_state.exec_queue_lookup)) + wake_up(&guc->submission_state.fini_wq); } static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) @@ -1274,13 +1295,16 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) static void guc_exec_queue_fini_async(struct xe_exec_queue *q) { + struct xe_guc *guc = exec_queue_to_guc(q); + struct xe_device *xe = guc_to_xe(guc); + INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); /* We must block on kernel engines so slabs are empty on driver unload */ if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) __guc_exec_queue_fini_async(&q->guc->fini_async); else - queue_work(system_wq, &q->guc->fini_async); + queue_work(xe->destroy_wq, &q->guc->fini_async); } static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index 546ac6350a31..69046f698271 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -81,6 +81,8 @@ struct xe_guc { #endif /** @submission_state.enabled: submission is enabled */ bool enabled; + /** @submission_state.fini_wq: submit fini wait queue */ + wait_queue_head_t fini_wq; } submission_state; /** @hwconfig: Hardware config state */ struct { -- cgit v1.2.3-58-ga151 From cb58977016d1b25781743e5fbe6a545493785e37 Mon Sep 17 00:00:00 2001 From: He Lugang Date: Wed, 11 Sep 2024 18:22:15 +0800 Subject: drm/xe: use devm_add_action_or_reset() helper Use devm_add_action_or_reset() to release resources in case of failure, because the cleanup function will be automatically called. Reviewed-by: Rodrigo Vivi Signed-off-by: He Lugang Link: https://patchwork.freedesktop.org/patch/msgid/9631BC17D1E028A2+20240911102215.84865-1-helugang@uniontech.com Signed-off-by: Rodrigo Vivi (cherry picked from commit fdc81c43f0c14ace6383024a02585e3fcbd1ceba) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_freq.c | 4 ++-- drivers/gpu/drm/xe/xe_gt_sysfs.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 68a5778b4319..ab76973f3e1e 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -237,11 +237,11 @@ int xe_gt_freq_init(struct xe_gt *gt) if (!gt->freq) return -ENOMEM; - err = devm_add_action(xe->drm.dev, freq_fini, gt->freq); + err = sysfs_create_files(gt->freq, freq_attrs); if (err) return err; - err = sysfs_create_files(gt->freq, freq_attrs); + err = devm_add_action_or_reset(xe->drm.dev, freq_fini, gt->freq); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index a05c3699e8b9..ec2b8246204b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -51,5 +51,5 @@ int xe_gt_sysfs_init(struct xe_gt *gt) gt->sysfs = &kg->base; - return devm_add_action(xe->drm.dev, gt_sysfs_fini, gt); + return devm_add_action_or_reset(xe->drm.dev, gt_sysfs_fini, gt); } -- cgit v1.2.3-58-ga151 From d1ef967126e295d36201e79ec64efdba31710353 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 17 Sep 2024 22:44:36 -0700 Subject: drm/xe: Convert to USM lock to rwsem Remove contention from GPU fault path for ASID->VM lookup. Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240918054436.1971839-1-matthew.brost@intel.com (cherry picked from commit 1378c633a3fbfeb344c486ffda0e920a21e62712) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 4 +--- drivers/gpu/drm/xe/xe_device_types.h | 2 +- drivers/gpu/drm/xe/xe_gt_pagefault.c | 8 ++++---- drivers/gpu/drm/xe/xe_vm.c | 8 ++++---- 4 files changed, 10 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 74e593caf87c..fb7ac06aeef8 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -339,9 +339,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, init_waitqueue_head(&xe->ufence_wq); - err = drmm_mutex_init(&xe->drm, &xe->usm.lock); - if (err) - goto err; + init_rwsem(&xe->usm.lock); xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 24c8c2d20676..00e370dcf4a9 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -369,7 +369,7 @@ struct xe_device { /** @usm.next_asid: next ASID, used to cyclical alloc asids */ u32 next_asid; /** @usm.lock: protects UM state */ - struct mutex lock; + struct rw_semaphore lock; } usm; /** @pinned: pinned BO state */ diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 00af059a8971..5c3af2bb5402 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -198,13 +198,13 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) return -EFAULT; /* ASID to VM */ - mutex_lock(&xe->usm.lock); + down_read(&xe->usm.lock); vm = xa_load(&xe->usm.asid_to_vm, pf->asid); if (vm && xe_vm_in_fault_mode(vm)) xe_vm_get(vm); else vm = NULL; - mutex_unlock(&xe->usm.lock); + up_read(&xe->usm.lock); if (!vm) return -EINVAL; @@ -549,11 +549,11 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) return -EINVAL; /* ASID to VM */ - mutex_lock(&xe->usm.lock); + down_read(&xe->usm.lock); vm = xa_load(&xe->usm.asid_to_vm, acc->asid); if (vm) xe_vm_get(vm); - mutex_unlock(&xe->usm.lock); + up_read(&xe->usm.lock); if (!vm || !xe_vm_in_fault_mode(vm)) return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7acd5fc9d032..a3d7cb7cfd22 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1613,7 +1613,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) up_write(&vm->lock); - mutex_lock(&xe->usm.lock); + down_write(&xe->usm.lock); if (vm->usm.asid) { void *lookup; @@ -1623,7 +1623,7 @@ void xe_vm_close_and_put(struct xe_vm *vm) lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); xe_assert(xe, lookup == vm); } - mutex_unlock(&xe->usm.lock); + up_write(&xe->usm.lock); for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); @@ -1772,11 +1772,11 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, goto err_close_and_put; if (xe->info.has_asid) { - mutex_lock(&xe->usm.lock); + down_write(&xe->usm.lock); err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, XA_LIMIT(1, XE_MAX_ASID - 1), &xe->usm.next_asid, GFP_KERNEL); - mutex_unlock(&xe->usm.lock); + up_write(&xe->usm.lock); if (err < 0) goto err_free_id; -- cgit v1.2.3-58-ga151 From 0f18ac78aa974660a948dafcc45f4dc6e2c5858d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 18 Sep 2024 09:05:03 -0700 Subject: drm/xe: Use helper for ASID -> VM in GPU faults and access counters Normalize both code paths with a helper. Fixes a possible leak access counter path too. Suggested-by: Matthew Auld Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240918160503.2021315-1-matthew.brost@intel.com (cherry picked from commit dc0dce6d63d22e8319e27b6a41be7368376f9471) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 39 +++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 5c3af2bb5402..79c426dc2505 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -185,6 +185,21 @@ unlock_dma_resv: return err; } +static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) +{ + struct xe_vm *vm; + + down_read(&xe->usm.lock); + vm = xa_load(&xe->usm.asid_to_vm, asid); + if (vm && xe_vm_in_fault_mode(vm)) + xe_vm_get(vm); + else + vm = ERR_PTR(-EINVAL); + up_read(&xe->usm.lock); + + return vm; +} + static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) { struct xe_device *xe = gt_to_xe(gt); @@ -197,16 +212,9 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) if (pf->trva_fault) return -EFAULT; - /* ASID to VM */ - down_read(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, pf->asid); - if (vm && xe_vm_in_fault_mode(vm)) - xe_vm_get(vm); - else - vm = NULL; - up_read(&xe->usm.lock); - if (!vm) - return -EINVAL; + vm = asid_to_vm(xe, pf->asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); /* * TODO: Change to read lock? Using write lock for simplicity. @@ -548,14 +556,9 @@ static int handle_acc(struct xe_gt *gt, struct acc *acc) if (acc->access_type != ACC_TRIGGER) return -EINVAL; - /* ASID to VM */ - down_read(&xe->usm.lock); - vm = xa_load(&xe->usm.asid_to_vm, acc->asid); - if (vm) - xe_vm_get(vm); - up_read(&xe->usm.lock); - if (!vm || !xe_vm_in_fault_mode(vm)) - return -EINVAL; + vm = asid_to_vm(xe, acc->asid); + if (IS_ERR(vm)) + return PTR_ERR(vm); down_read(&vm->lock); -- cgit v1.2.3-58-ga151 From 7929ffce0f8b9c76cb5c2a67d1966beaed20ab61 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 20 Sep 2024 18:13:15 -0300 Subject: drm/xe/mcr: Use Xe2_LPM steering tables for Xe2_HPM According to Bspec, Xe2 steering tables must be used for Xe2_HPM, just as it is with Xe2_LPM. Update our driver to reflect that. Bspec: 71186 Reviewed-by: Matt Roper Signed-off-by: Gustavo Sousa Reviewed-by: Tejas Upadhyay Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240920211459.255181-2-gustavo.sousa@intel.com (cherry picked from commit 21ae035ae5c33ef176f4062bd9d4aa973dde240b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_mcr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 7d7bd0be6233..c834f64b0178 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -439,7 +439,7 @@ void xe_gt_mcr_init(struct xe_gt *gt) if (gt->info.type == XE_GT_TYPE_MEDIA) { drm_WARN_ON(&xe->drm, MEDIA_VER(xe) < 13); - if (MEDIA_VER(xe) >= 20) { + if (MEDIA_VERx100(xe) >= 1301) { gt->steering[OADDRM].ranges = xe2lpm_gpmxmt_steering_table; gt->steering[INSTANCE0].ranges = xe2lpm_instance0_steering_table; } else { -- cgit v1.2.3-58-ga151 From 3bf90935aafc750c838c8831e96c3ac36cfd48d5 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 20 Sep 2024 18:13:16 -0300 Subject: drm/xe/xe2: Extend performance tuning to media GT With exception of "Tuning: L3 cache - media", we are currently applying recommended performance tuning settings only for the primary GT. Let's also implement them for the media GT when applicable. According to our spec, media GT registers CCCHKNREG1 and L3SQCREG* exist only in Xe2_LPM and their offsets do not match their primary GT counterparts. Furthermore, the range where CCCHKNREG1 belongs is not listed as a multicast range on the media GT. As such, we need to have Xe2_LPM-specific definitions for those registers and apply the setting only for that specific IP. Both Xe2_HPM and Xe2_LPM contain STATELESS_COMPRESSION_CTRL and the offset on the media GT matches the one on the primary one. So we can simply have a copy of "Tuning: Stateless compression control" for the media GT. v2: - Fix implementation with respect to multicast vs non-multicast registers. (Matt) - Add missing XE2LPM_CCCHKNREG1 on second action of "Tuning: Compression Overfetch - media". v3: - STATELESS_COMPRESSION_CTRL on Xe2_HPM is also a multicast register, do not define a XE2HPM_STATELESS_COMPRESSION_CTRL register. (Tejas) Bspec: 72161 Cc: Matt Roper Reviewed-by: Tejas Upadhyay Signed-off-by: Gustavo Sousa Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240920211459.255181-3-gustavo.sousa@intel.com (cherry picked from commit e1f813947ccf2326cfda4558b7d31430d7860c4b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 6 ++++++ drivers/gpu/drm/xe/xe_tuning.c | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 660ff42e45a6..5a1d4639e916 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -169,6 +169,8 @@ #define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define XE2LPM_CCCHKNREG1 XE_REG(0x82a8) + #define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) @@ -391,6 +393,10 @@ #define SCRATCH1LPFC XE_REG(0xb474) #define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0) +#define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604) + +#define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608) + #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index faa1bf42e50e..c798ae1b3f75 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -42,20 +42,40 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX), SET(CCCHKNREG1, L3CMPCTRL)) }, + { XE_RTP_NAME("Tuning: Compression Overfetch - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(CLR(XE2LPM_CCCHKNREG1, ENCOMPPERFFIX), + SET(XE2LPM_CCCHKNREG1, L3CMPCTRL)) + }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, + { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3 - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN)) + }, { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG2, COMPMEMRD256BOVRFETCHEN)) }, + { XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2, + COMPMEMRD256BOVRFETCHEN)) + }, { XE_RTP_NAME("Tuning: Stateless compression control"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) }, + { XE_RTP_NAME("Tuning: Stateless compression control - media"), + XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 2000)), + XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, + REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) + }, + {} }; -- cgit v1.2.3-58-ga151 From 6ef5a04221aaeb858d1a825b2ecb7e200cac80f8 Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 20 Sep 2024 18:13:18 -0300 Subject: drm/xe/xe2: Add performance tuning for L3 cache flushing A recommended performance tuning for LNL related to L3 cache flushing was recently introduced in Bspec. Implement it. Unlike the other existing tuning settings, we limit this one for LNL only, since there is no info about whether this would be applicable to other platforms yet. In the future we can come back and use IP version ranges if applicable. v2: - Fix reference to Bspec. (Sai Teja, Tejas) - Use correct register name for "Tuning: L3 RW flush all Cache". (Sai Teja) - Use SCRATCH3_LBCF (with the underscore) for better readability. v3: - Limit setting to LNL only. (Matt) Bspec: 72161 Cc: Sai Teja Pottumuttu Cc: Tejas Upadhyay Cc: Matt Roper Signed-off-by: Gustavo Sousa Reviewed-by: Matt Roper Reviewed-by: Tejas Upadhyay Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240920211459.255181-5-gustavo.sousa@intel.com (cherry picked from commit 876253165f3eaaacacb8c8bed16a9df4b6081479) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 5 +++++ drivers/gpu/drm/xe/xe_tuning.c | 8 ++++++++ 2 files changed, 13 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 5a1d4639e916..ac9c437e103d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -380,6 +380,9 @@ #define L3SQCREG3 XE_REG_MCR(0xb108) #define COMPPWOVERFETCHEN REG_BIT(28) +#define SCRATCH3_LBCF XE_REG_MCR(0xb154) +#define RWFLUSHALLEN REG_BIT(17) + #define XEHP_L3SQCREG5 XE_REG_MCR(0xb158) #define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) @@ -397,6 +400,8 @@ #define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608) +#define XE2LPM_SCRATCH3_LBCF XE_REG_MCR(0xb654) + #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index c798ae1b3f75..0d5e04158917 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -75,6 +75,14 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(FIELD_SET(STATELESS_COMPRESSION_CTRL, UNIFIED_COMPRESSION_FORMAT, REG_FIELD_PREP(UNIFIED_COMPRESSION_FORMAT, 0))) }, + { XE_RTP_NAME("Tuning: L3 RW flush all Cache"), + XE_RTP_RULES(GRAPHICS_VERSION(2004)), + XE_RTP_ACTIONS(SET(SCRATCH3_LBCF, RWFLUSHALLEN)) + }, + { XE_RTP_NAME("Tuning: L3 RW flush all cache - media"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) + }, {} }; -- cgit v1.2.3-58-ga151 From 1b30f87e088b499eb74298db256da5c98e8276e2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Jul 2024 16:59:19 -0700 Subject: drm/xe: Resume TDR after GT reset Not starting the TDR after GT reset on exec queue which have been restarted can lead to jobs being able to be run forever. Fix this by restarting the TDR. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240724235919.1917216-1-matthew.brost@intel.com (cherry picked from commit 8ec5a4e5ce97d6ee9f5eb5b4ce4cfc831976fdec) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gpu_scheduler.c | 5 +++++ drivers/gpu/drm/xe/xe_gpu_scheduler.h | 2 ++ drivers/gpu/drm/xe/xe_guc_submit.c | 1 + 3 files changed, 8 insertions(+) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c index c518d1d16d82..50361b4638f9 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c @@ -90,6 +90,11 @@ void xe_sched_submission_stop(struct xe_gpu_scheduler *sched) cancel_work_sync(&sched->work_process_msg); } +void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched) +{ + drm_sched_resume_timeout(&sched->base, sched->base.timeout); +} + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg) { diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index cee9c6809fc0..5ad5629a6c60 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -22,6 +22,8 @@ void xe_sched_fini(struct xe_gpu_scheduler *sched); void xe_sched_submission_start(struct xe_gpu_scheduler *sched); void xe_sched_submission_stop(struct xe_gpu_scheduler *sched); +void xe_sched_submission_resume_tdr(struct xe_gpu_scheduler *sched); + void xe_sched_add_msg(struct xe_gpu_scheduler *sched, struct xe_sched_msg *msg); void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched, diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 98a6a385a796..80062e1d3f66 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1826,6 +1826,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q) } xe_sched_submission_start(sched); + xe_sched_submission_resume_tdr(sched); } int xe_guc_submit_start(struct xe_guc *guc) -- cgit v1.2.3-58-ga151 From 9e3c85ddea7a473ed57b6cdfef2dfd468356fc91 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 20 Sep 2024 18:17:12 -0700 Subject: drm/xe: Clean up VM / exec queue file lock usage. Both the VM / exec queue file lock protect the lookup and reference to the object, nothing more. These locks are not intended anything else underneath them. XA have their own locking too, so no need to take the VM / exec queue file lock aside from when doing a lookup and reference get. Add some kernel doc to make this clear and cleanup a few typos too. Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240921011712.2681510-1-matthew.brost@intel.com (cherry picked from commit fe4f5d4b661666a45b48fe7f95443f8fefc09c8c) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 2 -- drivers/gpu/drm/xe/xe_device_types.h | 14 +++++++++++--- drivers/gpu/drm/xe/xe_drm_client.c | 9 ++++++++- drivers/gpu/drm/xe/xe_exec_queue.c | 2 -- drivers/gpu/drm/xe/xe_vm.c | 4 ---- 5 files changed, 19 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index fb7ac06aeef8..5a63d135ba96 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -171,10 +171,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_exec_queue_kill(q); xe_exec_queue_put(q); } - mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); - mutex_unlock(&xef->vm.lock); xe_file_put(xef); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 00e370dcf4a9..09d731a9125c 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -570,15 +570,23 @@ struct xe_file { struct { /** @vm.xe: xarray to store VMs */ struct xarray xa; - /** @vm.lock: protects file VM state */ + /** + * @vm.lock: Protects VM lookup + reference and removal a from + * file xarray. Not an intended to be an outer lock which does + * thing while being held. + */ struct mutex lock; } vm; /** @exec_queue: Submission exec queue state for file */ struct { - /** @exec_queue.xe: xarray to store engines */ + /** @exec_queue.xa: xarray to store exece queues */ struct xarray xa; - /** @exec_queue.lock: protects file engine state */ + /** + * @exec_queue.lock: Protects exec queue lookup + reference and + * removal a frommfile xarray. Not an intended to be an outer + * lock which does thing while being held. + */ struct mutex lock; } exec_queue; diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index c4add8b38bbd..fb52a23e28f8 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -283,8 +283,15 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) + xa_for_each(&xef->exec_queue.xa, i, q) { + xe_exec_queue_get(q); + mutex_unlock(&xef->exec_queue.lock); + xe_exec_queue_update_run_ticks(q); + + mutex_lock(&xef->exec_queue.lock); + xe_exec_queue_put(q); + } mutex_unlock(&xef->exec_queue.lock); /* Get the total GPU cycles */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7f28b7fc68d5..7743ebdcbf4b 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -635,9 +635,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } } - mutex_lock(&xef->exec_queue.lock); err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->exec_queue.lock); if (err) goto kill_exec_queue; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a3d7cb7cfd22..31fe31db3fdc 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1765,9 +1765,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(vm)) return PTR_ERR(vm); - mutex_lock(&xef->vm.lock); err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->vm.lock); if (err) goto err_close_and_put; @@ -1799,9 +1797,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, return 0; err_free_id: - mutex_lock(&xef->vm.lock); xa_erase(&xef->vm.xa, id); - mutex_unlock(&xef->vm.lock); err_close_and_put: xe_vm_close_and_put(vm); -- cgit v1.2.3-58-ga151 From 74231870cf4976f69e83aa24f48edb16619f652f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 25 Sep 2024 08:14:27 +0100 Subject: drm/xe/vm: move xa_alloc to prevent UAF Evil user can guess the next id of the vm before the ioctl completes and then call vm destroy ioctl to trigger UAF since create ioctl is still referencing the same vm. Move the xa_alloc all the way to the end to prevent this. v2: - Rebase Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Matthew Brost Cc: # v6.8+ Reviewed-by: Nirmoy Das Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240925071426.144015-3-matthew.auld@intel.com (cherry picked from commit dcfd3971327f3ee92765154baebbaece833d3ca9) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_vm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 31fe31db3fdc..ce9dca4d4e87 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1765,10 +1765,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, if (IS_ERR(vm)) return PTR_ERR(vm); - err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); - if (err) - goto err_close_and_put; - if (xe->info.has_asid) { down_write(&xe->usm.lock); err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, @@ -1776,12 +1772,11 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, &xe->usm.next_asid, GFP_KERNEL); up_write(&xe->usm.lock); if (err < 0) - goto err_free_id; + goto err_close_and_put; vm->usm.asid = asid; } - args->vm_id = id; vm->xef = xe_file_get(xef); /* Record BO memory for VM pagetable created against client */ @@ -1794,10 +1789,15 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); #endif + /* user id alloc must always be last in ioctl to prevent UAF */ + err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); + if (err) + goto err_close_and_put; + + args->vm_id = id; + return 0; -err_free_id: - xa_erase(&xef->vm.xa, id); err_close_and_put: xe_vm_close_and_put(vm); -- cgit v1.2.3-58-ga151 From 67801fa67b94ebd0e4da7a77ac2d9f321b75fbe0 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 25 Sep 2024 08:14:28 +0100 Subject: drm/xe/queue: move xa_alloc to prevent UAF Evil user can guess the next id of the queue before the ioctl completes and then call queue destroy ioctl to trigger UAF since create ioctl is still referencing the same queue. Move the xa_alloc all the way to the end to prevent this. v2: - Rebase Fixes: 2149ded63079 ("drm/xe: Fix use after free when client stats are captured") Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Nirmoy Das Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240925071426.144015-4-matthew.auld@intel.com (cherry picked from commit 16536582ddbebdbdf9e1d7af321bbba2bf955a87) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_exec_queue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 7743ebdcbf4b..d098d2dd1b2d 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -635,12 +635,14 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } } + q->xef = xe_file_get(xef); + + /* user id alloc must always be last in ioctl to prevent UAF */ err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); if (err) goto kill_exec_queue; args->exec_queue_id = id; - q->xef = xe_file_get(xef); return 0; -- cgit v1.2.3-58-ga151 From 8135f1c09dd2eecee7cb637f7ec9a29e57300eb8 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 24 Sep 2024 14:37:13 -0700 Subject: drm/xe/oa: Don't reset OAC_CONTEXT_ENABLE on OA stream close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa testing on Xe2+ revealed that when OA metrics are collected for an exec_queue, after the OA stream is closed, future batch buffers submitted on that exec_queue do not complete. Not resetting OAC_CONTEXT_ENABLE on OA stream close resolves these hangs and should not have any adverse effects. v2: Make the change that we don't reset the bit clearer (Ashutosh) Also make the same fix for OAC as OAR (Ashutosh) Bspec: 60314 Fixes: 2f4a730fcd2d ("drm/xe/oa: Add OAR support") Fixes: 14e077f8006d ("drm/xe/oa: Add OAC support") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2821 Signed-off-by: José Roberto de Souza Signed-off-by: Ashutosh Dixit Cc: stable@vger.kernel.org Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240924213713.3497992-1-ashutosh.dixit@intel.com (cherry picked from commit 0c8650b09a365f4a31fca1d1d1e9d99c56071128) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_oa.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index eae38a49ee8e..2804f14f8f29 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -709,8 +709,7 @@ static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) + _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), }, }; struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; @@ -742,10 +741,8 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | - _MASKED_FIELD(CTX_CTRL_RUN_ALONE, - enable ? CTX_CTRL_RUN_ALONE : 0), + _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | + _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), }, }; struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; -- cgit v1.2.3-58-ga151 From 7257d9c9a3c6cfe26c428e9b7ae21d61f2f55a79 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Fri, 27 Sep 2024 09:13:08 -0700 Subject: drm/xe: Prevent null pointer access in xe_migrate_copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xe_migrate_copy designed to copy content of TTM resources. When source resource is null, it will trigger a NULL pointer dereference in xe_migrate_copy. To avoid this situation, update lacks source flag to true for this case, the flag will trigger xe_migrate_clear rather than xe_migrate_copy. Issue trace: <7> [317.089847] xe 0000:00:02.0: [drm:xe_migrate_copy [xe]] Pass 14, sizes: 4194304 & 4194304 <7> [317.089945] xe 0000:00:02.0: [drm:xe_migrate_copy [xe]] Pass 15, sizes: 4194304 & 4194304 <1> [317.128055] BUG: kernel NULL pointer dereference, address: 0000000000000010 <1> [317.128064] #PF: supervisor read access in kernel mode <1> [317.128066] #PF: error_code(0x0000) - not-present page <6> [317.128069] PGD 0 P4D 0 <4> [317.128071] Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI <4> [317.128074] CPU: 1 UID: 0 PID: 1440 Comm: kunit_try_catch Tainted: G U N 6.11.0-rc7-xe #1 <4> [317.128078] Tainted: [U]=USER, [N]=TEST <4> [317.128080] Hardware name: Intel Corporation Lunar Lake Client Platform/LNL-M LP5 RVP1, BIOS LNLMFWI1.R00.3221.D80.2407291239 07/29/2024 <4> [317.128082] RIP: 0010:xe_migrate_copy+0x66/0x13e0 [xe] <4> [317.128158] Code: 00 00 48 89 8d e0 fe ff ff 48 8b 40 10 4c 89 85 c8 fe ff ff 44 88 8d bd fe ff ff 65 48 8b 3c 25 28 00 00 00 48 89 7d d0 31 ff <8b> 79 10 48 89 85 a0 fe ff ff 48 8b 00 48 89 b5 d8 fe ff ff 83 ff <4> [317.128162] RSP: 0018:ffffc9000167f9f0 EFLAGS: 00010246 <4> [317.128164] RAX: ffff8881120d8028 RBX: ffff88814d070428 RCX: 0000000000000000 <4> [317.128166] RDX: ffff88813cb99c00 RSI: 0000000004000000 RDI: 0000000000000000 <4> [317.128168] RBP: ffffc9000167fbb8 R08: ffff88814e7b1f08 R09: 0000000000000001 <4> [317.128170] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88814e7b1f08 <4> [317.128172] R13: ffff88814e7b1f08 R14: ffff88813cb99c00 R15: 0000000000000001 <4> [317.128174] FS: 0000000000000000(0000) GS:ffff88846f280000(0000) knlGS:0000000000000000 <4> [317.128176] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [317.128178] CR2: 0000000000000010 CR3: 000000011f676004 CR4: 0000000000770ef0 <4> [317.128180] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 <4> [317.128182] DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400 <4> [317.128184] PKRU: 55555554 <4> [317.128185] Call Trace: <4> [317.128187] <4> [317.128189] ? show_regs+0x67/0x70 <4> [317.128194] ? __die_body+0x20/0x70 <4> [317.128196] ? __die+0x2b/0x40 <4> [317.128198] ? page_fault_oops+0x15f/0x4e0 <4> [317.128203] ? do_user_addr_fault+0x3fb/0x970 <4> [317.128205] ? lock_acquire+0xc7/0x2e0 <4> [317.128209] ? exc_page_fault+0x87/0x2b0 <4> [317.128212] ? asm_exc_page_fault+0x27/0x30 <4> [317.128216] ? xe_migrate_copy+0x66/0x13e0 [xe] <4> [317.128263] ? __lock_acquire+0xb9d/0x26f0 <4> [317.128265] ? __lock_acquire+0xb9d/0x26f0 <4> [317.128267] ? sg_free_append_table+0x20/0x80 <4> [317.128271] ? lock_acquire+0xc7/0x2e0 <4> [317.128273] ? mark_held_locks+0x4d/0x80 <4> [317.128275] ? trace_hardirqs_on+0x1e/0xd0 <4> [317.128278] ? _raw_spin_unlock_irqrestore+0x31/0x60 <4> [317.128281] ? __pm_runtime_resume+0x60/0xa0 <4> [317.128284] xe_bo_move+0x682/0xc50 [xe] <4> [317.128315] ? lock_is_held_type+0xaa/0x120 <4> [317.128318] ttm_bo_handle_move_mem+0xe5/0x1a0 [ttm] <4> [317.128324] ttm_bo_validate+0xd1/0x1a0 [ttm] <4> [317.128328] shrink_test_run_device+0x721/0xc10 [xe] <4> [317.128360] ? find_held_lock+0x31/0x90 <4> [317.128363] ? lock_release+0xd1/0x2a0 <4> [317.128365] ? __pfx_kunit_generic_run_threadfn_adapter+0x10/0x10 [kunit] <4> [317.128370] xe_bo_shrink_kunit+0x11/0x20 [xe] <4> [317.128397] kunit_try_run_case+0x6e/0x150 [kunit] <4> [317.128400] ? trace_hardirqs_on+0x1e/0xd0 <4> [317.128402] ? _raw_spin_unlock_irqrestore+0x31/0x60 <4> [317.128404] kunit_generic_run_threadfn_adapter+0x1e/0x40 [kunit] <4> [317.128407] kthread+0xf5/0x130 <4> [317.128410] ? __pfx_kthread+0x10/0x10 <4> [317.128412] ret_from_fork+0x39/0x60 <4> [317.128415] ? __pfx_kthread+0x10/0x10 <4> [317.128416] ret_from_fork_asm+0x1a/0x30 <4> [317.128420] Fixes: 266c85885263 ("drm/xe/xe2: Handle flat ccs move for igfx.") Signed-off-by: Zhanjun Dong Reviewed-by: Thomas Hellström Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240927161308.862323-2-zhanjun.dong@intel.com (cherry picked from commit 59a1c9c7e1d02b43b415ea92627ce095b7c79e47) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index f379df3a12bf..e5f51fd23c65 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -680,8 +680,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, tt_has_data = ttm && (ttm_tt_is_populated(ttm) || (ttm->page_flags & TTM_TT_FLAG_SWAPPED)); - move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) : - (!mem_type_is_vram(old_mem_type) && !tt_has_data); + move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) : + (!mem_type_is_vram(old_mem_type) && !tt_has_data)); needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) || (!ttm && ttm_bo->type == ttm_bo_type_device); -- cgit v1.2.3-58-ga151 From a6f3b2527375c786f2eff77d3ee8b805bcfe026d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 27 Sep 2024 16:22:28 -0700 Subject: drm/xe: Fix memory leak when aborting binds Make sure to call xe_pt_update_ops_fini in xe_pt_update_ops_abort to free any memory the bind allocated. Caught by kmemleak when running Vulkan CTS tests on LNL. The leak seems to happen only when there's some kind of failure happening, like the lack of memory. Example output: unreferenced object 0xffff9120bdf62000 (size 8192): comm "deqp-vk", pid 115008, jiffies 4310295728 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 1b 05 f9 28 01 00 00 40 ...........(...@ 00 00 00 00 00 00 00 00 1b 15 f9 28 01 00 00 40 ...........(...@ backtrace (crc 7a56be79): [] __kmalloc_cache_noprof+0x310/0x3d0 [] xe_pt_new_shared.constprop.0+0x81/0xb0 [xe] [] xe_pt_insert_entry+0xb9/0x140 [xe] [] xe_pt_stage_bind_entry+0x12d/0x5b0 [xe] [] xe_pt_walk_range+0xea/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_stage_bind.constprop.0+0x25f/0x580 [xe] [] bind_op_prepare+0xea/0x6e0 [xe] [] xe_pt_update_ops_prepare+0x1c8/0x440 [xe] [] ops_execute+0x143/0x850 [xe] [] vm_bind_ioctl_ops_execute+0x244/0x800 [xe] [] xe_vm_bind_ioctl+0x1877/0x2370 [xe] [] drm_ioctl_kernel+0xb3/0x110 [drm] unreferenced object 0xffff9120bdf72000 (size 8192): comm "deqp-vk", pid 115008, jiffies 4310295728 hex dump (first 32 bytes): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk backtrace (crc 23b2f0b5): [] __kmalloc_cache_noprof+0x310/0x3d0 [] xe_pt_new_shared.constprop.0+0x81/0xb0 [xe] [] xe_pt_stage_unbind_post_descend+0xb3/0x150 [xe] [] xe_pt_walk_range+0x246/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_range+0x20a/0x280 [xe] [] xe_pt_walk_shared+0xc1/0x110 [xe] [] xe_pt_stage_unbind+0x9a/0xd0 [xe] [] unbind_op_prepare+0xdd/0x270 [xe] [] xe_pt_update_ops_prepare+0x106/0x440 [xe] [] ops_execute+0x143/0x850 [xe] [] vm_bind_ioctl_ops_execute+0x244/0x800 [xe] [] xe_vm_bind_ioctl+0x1877/0x2370 [xe] [] drm_ioctl_kernel+0xb3/0x110 [drm] [] drm_ioctl+0x280/0x4e0 [drm] Reported-by: Paulo Zanoni Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2877 Fixes: a708f6501c69 ("drm/xe: Update PT layer with better error handling") Signed-off-by: Matthew Brost Reviewed-by: Paulo Zanoni Link: https://patchwork.freedesktop.org/patch/msgid/20240927232228.3255246-1-matthew.brost@intel.com (cherry picked from commit 63e0695597a044c96bf369e4d8ba031291449d95) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index d6353e8969f0..f27f579f4d85 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2188,5 +2188,5 @@ void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) pt_op->num_entries); } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); + xe_pt_update_ops_fini(tile, vops); } -- cgit v1.2.3-58-ga151