diff options
author | Dave Airlie <airlied@redhat.com> | 2023-01-30 14:17:06 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2023-01-30 14:24:52 +1000 |
commit | dae437d568bbc3d4211b88d23fa0868a5019caf6 (patch) | |
tree | ebce0628697e03ef89cc8c187b0b16c756c00730 | |
parent | 54587d9943c906dd8ec2732dca3e9fd791aa789e (diff) | |
parent | 2abdd44e3126e29cc310cbf5f1eda7ca7d979df3 (diff) |
Merge tag 'drm/tegra/for-6.3-rc1' of https://gitlab.freedesktop.org/drm/tegra into drm-next
drm/tegra: Changes for v6.3-rc1
This set of changes includes a rework of the custom syncpoint interrupt
code to take better advantage of existing DRM/KMS infrastructure.
There's also various bits of cleanup and fixes included.
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Thierry Reding <thierry.reding@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230127170119.495943-1-thierry.reding@gmail.com
26 files changed, 285 insertions, 665 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 919490949a6c..0c0ea8fb305a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7040,7 +7040,7 @@ M: Thierry Reding <thierry.reding@gmail.com> L: dri-devel@lists.freedesktop.org L: linux-tegra@vger.kernel.org S: Supported -T: git git://anongit.freedesktop.org/tegra/linux.git +T: git https://gitlab.freedesktop.org/drm/tegra.git F: Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.yaml F: Documentation/devicetree/bindings/gpu/host1x/ F: drivers/gpu/drm/tegra/ diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c index 7dc681e2ee90..3c84e73d5051 100644 --- a/drivers/gpu/drm/tegra/dpaux.c +++ b/drivers/gpu/drm/tegra/dpaux.c @@ -598,7 +598,6 @@ static int tegra_dpaux_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM static int tegra_dpaux_suspend(struct device *dev) { struct tegra_dpaux *dpaux = dev_get_drvdata(dev); @@ -657,10 +656,9 @@ disable_clk: clk_disable_unprepare(dpaux->clk); return err; } -#endif static const struct dev_pm_ops tegra_dpaux_pm_ops = { - SET_RUNTIME_PM_OPS(tegra_dpaux_suspend, tegra_dpaux_resume, NULL) + RUNTIME_PM_OPS(tegra_dpaux_suspend, tegra_dpaux_resume, NULL) }; static const struct tegra_dpaux_soc tegra124_dpaux_soc = { @@ -694,7 +692,7 @@ struct platform_driver tegra_dpaux_driver = { .driver = { .name = "tegra-dpaux", .of_match_table = tegra_dpaux_of_match, - .pm = &tegra_dpaux_pm_ops, + .pm = pm_ptr(&tegra_dpaux_pm_ops), }, .probe = tegra_dpaux_probe, .remove = tegra_dpaux_remove, diff --git a/drivers/gpu/drm/tegra/firewall.c b/drivers/gpu/drm/tegra/firewall.c index 1824d2db0e2c..d53f890fa689 100644 --- a/drivers/gpu/drm/tegra/firewall.c +++ b/drivers/gpu/drm/tegra/firewall.c @@ -97,6 +97,9 @@ static int fw_check_regs_imm(struct tegra_drm_firewall *fw, u32 offset) { bool is_addr; + if (!fw->client->ops->is_addr_reg) + return 0; + is_addr = fw->client->ops->is_addr_reg(fw->client->base.dev, fw->class, offset); if (is_addr) diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c index 10fd21517281..86c5818ac27b 100644 --- a/drivers/gpu/drm/tegra/nvdec.c +++ b/drivers/gpu/drm/tegra/nvdec.c @@ -67,26 +67,18 @@ static inline void nvdec_writel(struct nvdec *nvdec, u32 value, static int nvdec_boot_falcon(struct nvdec *nvdec) { -#ifdef CONFIG_IOMMU_API - struct iommu_fwspec *spec = dev_iommu_fwspec_get(nvdec->dev); -#endif + u32 stream_id; int err; -#ifdef CONFIG_IOMMU_API - if (nvdec->config->supports_sid && spec) { + if (nvdec->config->supports_sid && tegra_dev_iommu_get_stream_id(nvdec->dev, &stream_id)) { u32 value; value = TRANSCFG_ATT(1, TRANSCFG_SID_FALCON) | TRANSCFG_ATT(0, TRANSCFG_SID_HW); nvdec_writel(nvdec, value, NVDEC_TFBIF_TRANSCFG); - if (spec->num_ids > 0) { - value = spec->ids[0] & 0xffff; - - nvdec_writel(nvdec, value, VIC_THI_STREAMID0); - nvdec_writel(nvdec, value, VIC_THI_STREAMID1); - } + nvdec_writel(nvdec, stream_id, VIC_THI_STREAMID0); + nvdec_writel(nvdec, stream_id, VIC_THI_STREAMID1); } -#endif err = falcon_boot(&nvdec->falcon); if (err < 0) diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c index 066f88564169..2430fcc97448 100644 --- a/drivers/gpu/drm/tegra/submit.c +++ b/drivers/gpu/drm/tegra/submit.c @@ -609,21 +609,13 @@ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, host1x_memory_context_get(job->memory_context); } } else if (context->client->ops->get_streamid_offset) { -#ifdef CONFIG_IOMMU_API - struct iommu_fwspec *spec; - /* * Job submission will need to temporarily change stream ID, * so need to tell it what to change it back to. */ - spec = dev_iommu_fwspec_get(context->client->base.dev); - if (spec && spec->num_ids > 0) - job->engine_fallback_streamid = spec->ids[0] & 0xffff; - else - job->engine_fallback_streamid = 0x7f; -#else - job->engine_fallback_streamid = 0x7f; -#endif + if (!tegra_dev_iommu_get_stream_id(context->client->base.dev, + &job->engine_fallback_streamid)) + job->engine_fallback_streamid = TEGRA_STREAM_ID_BYPASS; } /* Boot engine. */ @@ -654,7 +646,7 @@ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, args->syncpt.value = job->syncpt_end; if (syncobj) { - struct dma_fence *fence = host1x_fence_create(job->syncpt, job->syncpt_end); + struct dma_fence *fence = host1x_fence_create(job->syncpt, job->syncpt_end, true); if (IS_ERR(fence)) { err = PTR_ERR(fence); SUBMIT_ERR(context, "failed to create postfence: %d", err); @@ -680,8 +672,7 @@ free_job_data: kfree(job_data->used_mappings); } - if (job_data) - kfree(job_data); + kfree(job_data); put_bo: gather_bo_put(&bo->base); unlock: diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 7382ee132eb7..531a71c72061 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -56,41 +56,30 @@ static void vic_writel(struct vic *vic, u32 value, unsigned int offset) static int vic_boot(struct vic *vic) { -#ifdef CONFIG_IOMMU_API - struct iommu_fwspec *spec = dev_iommu_fwspec_get(vic->dev); -#endif - u32 fce_ucode_size, fce_bin_data_offset; + u32 fce_ucode_size, fce_bin_data_offset, stream_id; void *hdr; int err = 0; -#ifdef CONFIG_IOMMU_API - if (vic->config->supports_sid && spec) { + if (vic->config->supports_sid && tegra_dev_iommu_get_stream_id(vic->dev, &stream_id)) { u32 value; value = TRANSCFG_ATT(1, TRANSCFG_SID_FALCON) | TRANSCFG_ATT(0, TRANSCFG_SID_HW); vic_writel(vic, value, VIC_TFBIF_TRANSCFG); - if (spec->num_ids > 0) { - value = spec->ids[0] & 0xffff; - - /* - * STREAMID0 is used for input/output buffers. - * Initialize it to SID_VIC in case context isolation - * is not enabled, and SID_VIC is used for both firmware - * and data buffers. - * - * If context isolation is enabled, it will be - * overridden by the SETSTREAMID opcode as part of - * each job. - */ - vic_writel(vic, value, VIC_THI_STREAMID0); - - /* STREAMID1 is used for firmware loading. */ - vic_writel(vic, value, VIC_THI_STREAMID1); - } + /* + * STREAMID0 is used for input/output buffers. Initialize it to SID_VIC in case + * context isolation is not enabled, and SID_VIC is used for both firmware and + * data buffers. + * + * If context isolation is enabled, it will be overridden by the SETSTREAMID + * opcode as part of each job. + */ + vic_writel(vic, stream_id, VIC_THI_STREAMID0); + + /* STREAMID1 is used for firmware loading. */ + vic_writel(vic, stream_id, VIC_THI_STREAMID1); } -#endif /* setup clockgating registers */ vic_writel(vic, CG_IDLE_CG_DLY_CNT(4) | diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 103fda055394..bc821b0ed908 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -490,6 +490,15 @@ resume: host1x_hw_cdma_resume(host1x, cdma, restart_addr); } +static void cdma_update_work(struct work_struct *work) +{ + struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work); + + mutex_lock(&cdma->lock); + update_cdma_locked(cdma); + mutex_unlock(&cdma->lock); +} + /* * Create a cdma */ @@ -499,6 +508,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma) mutex_init(&cdma->lock); init_completion(&cdma->complete); + INIT_WORK(&cdma->update_work, cdma_update_work); INIT_LIST_HEAD(&cdma->sync_queue); @@ -679,7 +689,5 @@ void host1x_cdma_end(struct host1x_cdma *cdma, */ void host1x_cdma_update(struct host1x_cdma *cdma) { - mutex_lock(&cdma->lock); - update_cdma_locked(cdma); - mutex_unlock(&cdma->lock); + schedule_work(&cdma->update_work); } diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h index 12c4327c4df0..7fd8168af4f9 100644 --- a/drivers/gpu/host1x/cdma.h +++ b/drivers/gpu/host1x/cdma.h @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/completion.h> #include <linux/list.h> +#include <linux/workqueue.h> struct host1x_syncpt; struct host1x_userctx_timeout; @@ -69,6 +70,7 @@ struct host1x_cdma { struct buffer_timeout timeout; /* channel's timeout state/wq */ bool running; bool torndown; + struct work_struct update_work; }; #define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma) diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c index c8e7994c2c9c..8beedcf080ab 100644 --- a/drivers/gpu/host1x/context.c +++ b/drivers/gpu/host1x/context.c @@ -35,8 +35,6 @@ int host1x_memory_context_list_init(struct host1x *host1x) cdl->len = err / 4; for (i = 0; i < cdl->len; i++) { - struct iommu_fwspec *fwspec; - ctx = &cdl->devs[i]; ctx->host = host1x; @@ -70,14 +68,12 @@ int host1x_memory_context_list_init(struct host1x *host1x) goto del_devices; } - fwspec = dev_iommu_fwspec_get(&ctx->dev); - if (!fwspec || !device_iommu_mapped(&ctx->dev)) { + if (!tegra_dev_iommu_get_stream_id(&ctx->dev, &ctx->stream_id) || + !device_iommu_mapped(&ctx->dev)) { dev_err(host1x->dev, "Context device %d has no IOMMU!\n", i); device_del(&ctx->dev); goto del_devices; } - - ctx->stream_id = fwspec->ids[0] & 0xffff; } return 0; diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c index 6649b04b7131..a18cc8d8caf5 100644 --- a/drivers/gpu/host1x/debug.c +++ b/drivers/gpu/host1x/debug.c @@ -77,6 +77,7 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) static void show_syncpts(struct host1x *m, struct output *o, bool show_all) { + unsigned long irqflags; struct list_head *pos; unsigned int i; int err; @@ -92,10 +93,10 @@ static void show_syncpts(struct host1x *m, struct output *o, bool show_all) u32 min = host1x_syncpt_load(m->syncpt + i); unsigned int waiters = 0; - spin_lock(&m->syncpt[i].intr.lock); - list_for_each(pos, &m->syncpt[i].intr.wait_head) + spin_lock_irqsave(&m->syncpt[i].fences.lock, irqflags); + list_for_each(pos, &m->syncpt[i].fences.list) waiters++; - spin_unlock(&m->syncpt[i].intr.lock); + spin_unlock_irqrestore(&m->syncpt[i].fences.lock, irqflags); if (!kref_read(&m->syncpt[i].ref)) continue; diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index f31039aca03c..4872d183d860 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -516,7 +516,7 @@ static int host1x_probe(struct platform_device *pdev) return PTR_ERR(host->regs); } - syncpt_irq = platform_get_irq(pdev, 0); + host->syncpt_irq = platform_get_irq(pdev, 0); if (syncpt_irq < 0) return syncpt_irq; @@ -578,7 +578,7 @@ static int host1x_probe(struct platform_device *pdev) goto free_contexts; } - err = host1x_intr_init(host, syncpt_irq); + err = host1x_intr_init(host); if (err) { dev_err(&pdev->dev, "failed to initialize interrupts\n"); goto deinit_syncpt; diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index 920e5548cfbc..75de50fe03d0 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -74,8 +74,7 @@ struct host1x_syncpt_ops { }; struct host1x_intr_ops { - int (*init_host_sync)(struct host1x *host, u32 cpm, - void (*syncpt_thresh_work)(struct work_struct *work)); + int (*init_host_sync)(struct host1x *host, u32 cpm); void (*set_syncpt_threshold)( struct host1x *host, unsigned int id, u32 thresh); void (*enable_syncpt_intr)(struct host1x *host, unsigned int id); @@ -125,6 +124,7 @@ struct host1x { void __iomem *regs; void __iomem *hv_regs; /* hypervisor region */ void __iomem *common_regs; + int syncpt_irq; struct host1x_syncpt *syncpt; struct host1x_syncpt_base *bases; struct device *dev; @@ -138,7 +138,6 @@ struct host1x { dma_addr_t iova_end; struct mutex intr_mutex; - int intr_syncpt_irq; const struct host1x_syncpt_ops *syncpt_op; const struct host1x_intr_ops *intr_op; @@ -216,10 +215,9 @@ static inline void host1x_hw_syncpt_enable_protection(struct host1x *host) return host->syncpt_op->enable_protection(host); } -static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm, - void (*syncpt_thresh_work)(struct work_struct *)) +static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm) { - return host->intr_op->init_host_sync(host, cpm, syncpt_thresh_work); + return host->intr_op->init_host_sync(host, cpm); } static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host, diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c index df428bcbae69..139ad1afd935 100644 --- a/drivers/gpu/host1x/fence.c +++ b/drivers/gpu/host1x/fence.c @@ -15,22 +15,6 @@ #include "intr.h" #include "syncpt.h" -static DEFINE_SPINLOCK(lock); - -struct host1x_syncpt_fence { - struct dma_fence base; - - atomic_t signaling; - - struct host1x_syncpt *sp; - u32 threshold; - - struct host1x_waitlist *waiter; - void *waiter_ref; - - struct delayed_work timeout_work; -}; - static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f) { return "host1x"; @@ -49,11 +33,11 @@ static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f) static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f) { struct host1x_syncpt_fence *sf = to_host1x_fence(f); - int err; if (host1x_syncpt_is_expired(sf->sp, sf->threshold)) return false; + /* Reference for interrupt path. */ dma_fence_get(f); /* @@ -61,24 +45,17 @@ static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f) * reference to any fences for which 'enable_signaling' has been * called (and that have not been signalled). * - * We provide a userspace API to create arbitrary syncpoint fences, - * so we cannot normally guarantee that all fences get signalled. - * As such, setup a timeout, so that long-lasting fences will get - * reaped eventually. + * We cannot currently always guarantee that all fences get signalled + * or cancelled. As such, for such situations, set up a timeout, so + * that long-lasting fences will get reaped eventually. */ - schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000)); - - err = host1x_intr_add_action(sf->sp->host, sf->sp, sf->threshold, - HOST1X_INTR_ACTION_SIGNAL_FENCE, f, - sf->waiter, &sf->waiter_ref); - if (err) { - cancel_delayed_work_sync(&sf->timeout_work); - dma_fence_put(f); - return false; + if (sf->timeout) { + /* Reference for timeout path. */ + dma_fence_get(f); + schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000)); } - /* intr framework takes ownership of waiter */ - sf->waiter = NULL; + host1x_intr_add_fence_locked(sf->sp->host, sf); /* * The fence may get signalled at any time after the above call, @@ -89,37 +66,32 @@ static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f) return true; } -static void host1x_syncpt_fence_release(struct dma_fence *f) -{ - struct host1x_syncpt_fence *sf = to_host1x_fence(f); - - if (sf->waiter) - kfree(sf->waiter); - - dma_fence_free(f); -} - static const struct dma_fence_ops host1x_syncpt_fence_ops = { .get_driver_name = host1x_syncpt_fence_get_driver_name, .get_timeline_name = host1x_syncpt_fence_get_timeline_name, .enable_signaling = host1x_syncpt_fence_enable_signaling, - .release = host1x_syncpt_fence_release, }; void host1x_fence_signal(struct host1x_syncpt_fence *f) { - if (atomic_xchg(&f->signaling, 1)) + if (atomic_xchg(&f->signaling, 1)) { + /* + * Already on timeout path, but we removed the fence before + * timeout path could, so drop interrupt path reference. + */ + dma_fence_put(&f->base); return; + } - /* - * Cancel pending timeout work - if it races, it will - * not get 'f->signaling' and return. - */ - cancel_delayed_work_sync(&f->timeout_work); - - host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, false); + if (f->timeout && cancel_delayed_work(&f->timeout_work)) { + /* + * We know that the timeout path will not be entered. + * Safe to drop the timeout path's reference now. + */ + dma_fence_put(&f->base); + } - dma_fence_signal(&f->base); + dma_fence_signal_locked(&f->base); dma_fence_put(&f->base); } @@ -129,21 +101,29 @@ static void do_fence_timeout(struct work_struct *work) struct host1x_syncpt_fence *f = container_of(dwork, struct host1x_syncpt_fence, timeout_work); - if (atomic_xchg(&f->signaling, 1)) + if (atomic_xchg(&f->signaling, 1)) { + /* Already on interrupt path, drop timeout path reference if any. */ + if (f->timeout) + dma_fence_put(&f->base); return; + } - /* - * Cancel pending timeout work - if it races, it will - * not get 'f->signaling' and return. - */ - host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, true); + if (host1x_intr_remove_fence(f->sp->host, f)) { + /* + * Managed to remove fence from queue, so it's safe to drop + * the interrupt path's reference. + */ + dma_fence_put(&f->base); + } dma_fence_set_error(&f->base, -ETIMEDOUT); dma_fence_signal(&f->base); - dma_fence_put(&f->base); + if (f->timeout) + dma_fence_put(&f->base); } -struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold) +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold, + bool timeout) { struct host1x_syncpt_fence *fence; @@ -151,16 +131,11 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold) if (!fence) return ERR_PTR(-ENOMEM); - fence->waiter = kzalloc(sizeof(*fence->waiter), GFP_KERNEL); - if (!fence->waiter) { - kfree(fence); - return ERR_PTR(-ENOMEM); - } - fence->sp = sp; fence->threshold = threshold; + fence->timeout = timeout; - dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &lock, + dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &sp->fences.lock, dma_fence_context_alloc(1), 0); INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout); @@ -168,3 +143,12 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold) return &fence->base; } EXPORT_SYMBOL(host1x_fence_create); + +void host1x_fence_cancel(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + + schedule_delayed_work(&sf->timeout_work, 0); + flush_delayed_work(&sf->timeout_work); +} +EXPORT_SYMBOL(host1x_fence_cancel); diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h index 70c91de82f14..f3c644c73cad 100644 --- a/drivers/gpu/host1x/fence.h +++ b/drivers/gpu/host1x/fence.h @@ -6,7 +6,24 @@ #ifndef HOST1X_FENCE_H #define HOST1X_FENCE_H -struct host1x_syncpt_fence; +struct host1x_syncpt_fence { + struct dma_fence base; + + atomic_t signaling; + + struct host1x_syncpt *sp; + u32 threshold; + bool timeout; + + struct delayed_work timeout_work; + + struct list_head list; +}; + +struct host1x_fence_list { + spinlock_t lock; + struct list_head list; +}; void host1x_fence_signal(struct host1x_syncpt_fence *fence); diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 732abe0750ff..d44b8de890be 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -179,14 +179,12 @@ static inline void synchronize_syncpt_base(struct host1x_job *job) static void host1x_channel_set_streamid(struct host1x_channel *channel) { #if HOST1X_HW >= 6 - u32 sid = 0x7f; -#ifdef CONFIG_IOMMU_API - struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent); - if (spec) - sid = spec->ids[0] & 0xffff; -#endif + u32 stream_id; + + if (!tegra_dev_iommu_get_stream_id(channel->dev->parent, &stream_id)) + stream_id = TEGRA_STREAM_ID_BYPASS; - host1x_ch_writel(channel, sid, HOST1X_CHANNEL_SMMU_STREAMID); + host1x_ch_writel(channel, stream_id, HOST1X_CHANNEL_SMMU_STREAMID); #endif } @@ -278,6 +276,14 @@ static void channel_program_cdma(struct host1x_job *job) #endif } +static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb); + + /* Schedules CDMA update. */ + host1x_cdma_update(&job->channel->cdma); +} + static int channel_submit(struct host1x_job *job) { struct host1x_channel *ch = job->channel; @@ -285,7 +291,6 @@ static int channel_submit(struct host1x_job *job) u32 prev_max = 0; u32 syncval; int err; - struct host1x_waitlist *completed_waiter = NULL; struct host1x *host = dev_get_drvdata(ch->dev->parent); trace_host1x_channel_submit(dev_name(ch->dev), @@ -298,14 +303,7 @@ static int channel_submit(struct host1x_job *job) /* get submit lock */ err = mutex_lock_interruptible(&ch->submitlock); if (err) - goto error; - - completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL); - if (!completed_waiter) { - mutex_unlock(&ch->submitlock); - err = -ENOMEM; - goto error; - } + return err; host1x_channel_set_streamid(ch); host1x_enable_gather_filter(ch); @@ -315,31 +313,37 @@ static int channel_submit(struct host1x_job *job) err = host1x_cdma_begin(&ch->cdma, job); if (err) { mutex_unlock(&ch->submitlock); - goto error; + return err; } channel_program_cdma(job); syncval = host1x_syncpt_read_max(sp); + /* + * Create fence before submitting job to HW to avoid job completing + * before the fence is set up. + */ + job->fence = host1x_fence_create(sp, syncval, true); + if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) { + job->fence = NULL; + } else { + err = dma_fence_add_callback(job->fence, &job->fence_cb, + job_complete_callback); + } + /* end CDMA submit & stash pinned hMems into sync queue */ host1x_cdma_end(&ch->cdma, job); trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval); - /* schedule a submit complete interrupt */ - err = host1x_intr_add_action(host, sp, syncval, - HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch, - completed_waiter, &job->waiter); - completed_waiter = NULL; - WARN(err, "Failed to set submit complete interrupt"); - mutex_unlock(&ch->submitlock); - return 0; + if (err == -ENOENT) + host1x_cdma_update(&ch->cdma); + else + WARN(err, "Failed to set submit complete interrupt"); -error: - kfree(completed_waiter); - return err; + return 0; } static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h index 5f831438d19b..50c32de452fb 100644 --- a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h @@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) host1x_uclass_incr_syncpt_cond_f(v) static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) { - return (v & 0xff) << 0; + return (v & 0x3ff) << 0; } #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ host1x_uclass_incr_syncpt_indx_f(v) diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h index 8cd2ef087d5d..887b878f92f7 100644 --- a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h @@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) host1x_uclass_incr_syncpt_cond_f(v) static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) { - return (v & 0xff) << 0; + return (v & 0x3ff) << 0; } #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ host1x_uclass_incr_syncpt_indx_f(v) diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h index 724cccd71aa1..4fb1d090edae 100644 --- a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h @@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) host1x_uclass_incr_syncpt_cond_f(v) static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) { - return (v & 0xff) << 0; + return (v & 0x3ff) << 0; } #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ host1x_uclass_incr_syncpt_indx_f(v) diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c index 9acccdb139e6..b915ef7d0348 100644 --- a/drivers/gpu/host1x/hw/intr_hw.c +++ b/drivers/gpu/host1x/hw/intr_hw.c @@ -13,23 +13,6 @@ #include "../intr.h" #include "../dev.h" -/* - * Sync point threshold interrupt service function - * Handles sync point threshold triggers, in interrupt context - */ -static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt) -{ - unsigned int id = syncpt->id; - struct host1x *host = syncpt->host; - - host1x_sync_writel(host, BIT(id % 32), - HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id / 32)); - host1x_sync_writel(host, BIT(id % 32), - HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32)); - - schedule_work(&syncpt->intr.work); -} - static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id) { struct host1x *host = dev_id; @@ -39,17 +22,20 @@ static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id) for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) { reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); - for_each_set_bit(id, ®, 32) { - struct host1x_syncpt *syncpt = - host->syncpt + (i * 32 + id); - host1x_intr_syncpt_handle(syncpt); - } + + host1x_sync_writel(host, reg, + HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i)); + host1x_sync_writel(host, reg, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + + for_each_set_bit(id, ®, 32) + host1x_intr_handle_interrupt(host, i * 32 + id); } return IRQ_HANDLED; } -static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host) +static void host1x_intr_disable_all_syncpt_intrs(struct host1x *host) { unsigned int i; @@ -90,45 +76,38 @@ static void intr_hw_init(struct host1x *host, u32 cpm) } static int -_host1x_intr_init_host_sync(struct host1x *host, u32 cpm, - void (*syncpt_thresh_work)(struct work_struct *)) +host1x_intr_init_host_sync(struct host1x *host, u32 cpm) { - unsigned int i; int err; host1x_hw_intr_disable_all_syncpt_intrs(host); - for (i = 0; i < host->info->nb_pts; i++) - INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work); - - err = devm_request_irq(host->dev, host->intr_syncpt_irq, + err = devm_request_irq(host->dev, host->syncpt_irq, syncpt_thresh_isr, IRQF_SHARED, "host1x_syncpt", host); - if (err < 0) { - WARN_ON(1); + if (err < 0) return err; - } intr_hw_init(host, cpm); return 0; } -static void _host1x_intr_set_syncpt_threshold(struct host1x *host, +static void host1x_intr_set_syncpt_threshold(struct host1x *host, unsigned int id, u32 thresh) { host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id)); } -static void _host1x_intr_enable_syncpt_intr(struct host1x *host, +static void host1x_intr_enable_syncpt_intr(struct host1x *host, unsigned int id) { host1x_sync_writel(host, BIT(id % 32), HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id / 32)); } -static void _host1x_intr_disable_syncpt_intr(struct host1x *host, +static void host1x_intr_disable_syncpt_intr(struct host1x *host, unsigned int id) { host1x_sync_writel(host, BIT(id % 32), @@ -137,23 +116,10 @@ static void _host1x_intr_disable_syncpt_intr(struct host1x *host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32)); } -static int _host1x_free_syncpt_irq(struct host1x *host) -{ - unsigned int i; - - devm_free_irq(host->dev, host->intr_syncpt_irq, host); - - for (i = 0; i < host->info->nb_pts; i++) - cancel_work_sync(&host->syncpt[i].intr.work); - - return 0; -} - static const struct host1x_intr_ops host1x_intr_ops = { - .init_host_sync = _host1x_intr_init_host_sync, - .set_syncpt_threshold = _host1x_intr_set_syncpt_threshold, - .enable_syncpt_intr = _host1x_intr_enable_syncpt_intr, - .disable_syncpt_intr = _host1x_intr_disable_syncpt_intr, - .disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs, - .free_syncpt_irq = _host1x_free_syncpt_irq, + .init_host_sync = host1x_intr_init_host_sync, + .set_syncpt_threshold = host1x_intr_set_syncpt_threshold, + .enable_syncpt_intr = host1x_intr_enable_syncpt_intr, + .disable_syncpt_intr = host1x_intr_disable_syncpt_intr, + .disable_all_syncpt_intrs = host1x_intr_disable_all_syncpt_intrs, }; diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c index dd39d67ccec3..8cf35b2eff3d 100644 --- a/drivers/gpu/host1x/hw/syncpt_hw.c +++ b/drivers/gpu/host1x/hw/syncpt_hw.c @@ -106,9 +106,6 @@ static void syncpt_assign_to_channel(struct host1x_syncpt *sp, #if HOST1X_HW >= 6 struct host1x *host = sp->host; - if (!host->hv_regs) - return; - host1x_sync_writel(host, HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff), HOST1X_SYNC_SYNCPT_CH_APP(sp->id)); diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 965ba21818b1..995bfa980837 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -2,299 +2,113 @@ /* * Tegra host1x Interrupt Management * - * Copyright (c) 2010-2013, NVIDIA Corporation. + * Copyright (c) 2010-2021, NVIDIA Corporation. */ #include <linux/clk.h> -#include <linux/interrupt.h> -#include <linux/slab.h> -#include <linux/irq.h> -#include <trace/events/host1x.h> -#include "channel.h" #include "dev.h" #include "fence.h" #include "intr.h" -/* Wait list management */ - -enum waitlist_state { - WLS_PENDING, - WLS_REMOVED, - WLS_CANCELLED, - WLS_HANDLED -}; - -static void waiter_release(struct kref *kref) -{ - kfree(container_of(kref, struct host1x_waitlist, refcount)); -} - -/* - * add a waiter to a waiter queue, sorted by threshold - * returns true if it was added at the head of the queue - */ -static bool add_waiter_to_queue(struct host1x_waitlist *waiter, - struct list_head *queue) -{ - struct host1x_waitlist *pos; - u32 thresh = waiter->thresh; - - list_for_each_entry_reverse(pos, queue, list) - if ((s32)(pos->thresh - thresh) <= 0) { - list_add(&waiter->list, &pos->list); - return false; - } - - list_add(&waiter->list, queue); - return true; -} - -/* - * run through a waiter queue for a single sync point ID - * and gather all completed waiters into lists by actions - */ -static void remove_completed_waiters(struct list_head *head, u32 sync, - struct list_head completed[HOST1X_INTR_ACTION_COUNT]) +static void host1x_intr_add_fence_to_list(struct host1x_fence_list *list, + struct host1x_syncpt_fence *fence) { - struct list_head *dest; - struct host1x_waitlist *waiter, *next, *prev; - - list_for_each_entry_safe(waiter, next, head, list) { - if ((s32)(waiter->thresh - sync) > 0) - break; + struct host1x_syncpt_fence *fence_in_list; - dest = completed + waiter->action; - - /* consolidate submit cleanups */ - if (waiter->action == HOST1X_INTR_ACTION_SUBMIT_COMPLETE && - !list_empty(dest)) { - prev = list_entry(dest->prev, - struct host1x_waitlist, list); - if (prev->data == waiter->data) { - prev->count++; - dest = NULL; - } + list_for_each_entry_reverse(fence_in_list, &list->list, list) { + if ((s32)(fence_in_list->threshold - fence->threshold) <= 0) { + /* Fence in list is before us, we can insert here */ + list_add(&fence->list, &fence_in_list->list); + return; } - - /* PENDING->REMOVED or CANCELLED->HANDLED */ - if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) { - list_del(&waiter->list); - kref_put(&waiter->refcount, waiter_release); - } else - list_move_tail(&waiter->list, dest); } -} - -static void reset_threshold_interrupt(struct host1x *host, - struct list_head *head, - unsigned int id) -{ - u32 thresh = - list_first_entry(head, struct host1x_waitlist, list)->thresh; - - host1x_hw_intr_set_syncpt_threshold(host, id, thresh); - host1x_hw_intr_enable_syncpt_intr(host, id); -} - -static void action_submit_complete(struct host1x_waitlist *waiter) -{ - struct host1x_channel *channel = waiter->data; - - host1x_cdma_update(&channel->cdma); - - /* Add nr_completed to trace */ - trace_host1x_channel_submit_complete(dev_name(channel->dev), - waiter->count, waiter->thresh); -} -static void action_wakeup(struct host1x_waitlist *waiter) -{ - wait_queue_head_t *wq = waiter->data; - - wake_up(wq); -} - -static void action_wakeup_interruptible(struct host1x_waitlist *waiter) -{ - wait_queue_head_t *wq = waiter->data; - - wake_up_interruptible(wq); + /* Add as first in list */ + list_add(&fence->list, &list->list); } -static void action_signal_fence(struct host1x_waitlist *waiter) +static void host1x_intr_update_hw_state(struct host1x *host, struct host1x_syncpt *sp) { - struct host1x_syncpt_fence *f = waiter->data; - - host1x_fence_signal(f); -} + struct host1x_syncpt_fence *fence; -typedef void (*action_handler)(struct host1x_waitlist *waiter); + if (!list_empty(&sp->fences.list)) { + fence = list_first_entry(&sp->fences.list, struct host1x_syncpt_fence, list); -static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = { - action_submit_complete, - action_wakeup, - action_wakeup_interruptible, - action_signal_fence, -}; - -static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT]) -{ - struct list_head *head = completed; - unsigned int i; - - for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) { - action_handler handler = action_handlers[i]; - struct host1x_waitlist *waiter, *next; - - list_for_each_entry_safe(waiter, next, head, list) { - list_del(&waiter->list); - handler(waiter); - WARN_ON(atomic_xchg(&waiter->state, WLS_HANDLED) != - WLS_REMOVED); - kref_put(&waiter->refcount, waiter_release); - } + host1x_hw_intr_set_syncpt_threshold(host, sp->id, fence->threshold); + host1x_hw_intr_enable_syncpt_intr(host, sp->id); + } else { + host1x_hw_intr_disable_syncpt_intr(host, sp->id); } } -/* - * Remove & handle all waiters that have completed for the given syncpt - */ -static int process_wait_list(struct host1x *host, - struct host1x_syncpt *syncpt, - u32 threshold) +void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence) { - struct list_head completed[HOST1X_INTR_ACTION_COUNT]; - unsigned int i; - int empty; - - for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i) - INIT_LIST_HEAD(completed + i); - - spin_lock(&syncpt->intr.lock); - - remove_completed_waiters(&syncpt->intr.wait_head, threshold, - completed); - - empty = list_empty(&syncpt->intr.wait_head); - if (empty) - host1x_hw_intr_disable_syncpt_intr(host, syncpt->id); - else - reset_threshold_interrupt(host, &syncpt->intr.wait_head, - syncpt->id); - - spin_unlock(&syncpt->intr.lock); - - run_handlers(completed); - - return empty; -} + struct host1x_fence_list *fence_list = &fence->sp->fences; -/* - * Sync point threshold interrupt service thread function - * Handles sync point threshold triggers, in thread context - */ + INIT_LIST_HEAD(&fence->list); -static void syncpt_thresh_work(struct work_struct *work) -{ - struct host1x_syncpt_intr *syncpt_intr = - container_of(work, struct host1x_syncpt_intr, work); - struct host1x_syncpt *syncpt = - container_of(syncpt_intr, struct host1x_syncpt, intr); - unsigned int id = syncpt->id; - struct host1x *host = syncpt->host; - - (void)process_wait_list(host, syncpt, - host1x_syncpt_load(host->syncpt + id)); + host1x_intr_add_fence_to_list(fence_list, fence); + host1x_intr_update_hw_state(host, fence->sp); } -int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt, - u32 thresh, enum host1x_intr_action action, - void *data, struct host1x_waitlist *waiter, - void **ref) +bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence) { - int queue_was_empty; - - if (waiter == NULL) { - pr_warn("%s: NULL waiter\n", __func__); - return -EINVAL; - } - - /* initialize a new waiter */ - INIT_LIST_HEAD(&waiter->list); - kref_init(&waiter->refcount); - if (ref) - kref_get(&waiter->refcount); - waiter->thresh = thresh; - waiter->action = action; - atomic_set(&waiter->state, WLS_PENDING); - waiter->data = data; - waiter->count = 1; - - spin_lock(&syncpt->intr.lock); + struct host1x_fence_list *fence_list = &fence->sp->fences; + unsigned long irqflags; - queue_was_empty = list_empty(&syncpt->intr.wait_head); + spin_lock_irqsave(&fence_list->lock, irqflags); - if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) { - /* added at head of list - new threshold value */ - host1x_hw_intr_set_syncpt_threshold(host, syncpt->id, thresh); - - /* added as first waiter - enable interrupt */ - if (queue_was_empty) - host1x_hw_intr_enable_syncpt_intr(host, syncpt->id); + if (list_empty(&fence->list)) { + spin_unlock_irqrestore(&fence_list->lock, irqflags); + return false; } - if (ref) - *ref = waiter; + list_del_init(&fence->list); + host1x_intr_update_hw_state(host, fence->sp); - spin_unlock(&syncpt->intr.lock); + spin_unlock_irqrestore(&fence_list->lock, irqflags); - return 0; + return true; } -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref, - bool flush) +void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id) { - struct host1x_waitlist *waiter = ref; - struct host1x_syncpt *syncpt; + struct host1x_syncpt *sp = &host->syncpt[id]; + struct host1x_syncpt_fence *fence, *tmp; + unsigned int value; - atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED); + value = host1x_syncpt_load(sp); - syncpt = host->syncpt + id; + spin_lock(&sp->fences.lock); - spin_lock(&syncpt->intr.lock); - if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) == - WLS_CANCELLED) { - list_del(&waiter->list); - kref_put(&waiter->refcount, waiter_release); - } - spin_unlock(&syncpt->intr.lock); + list_for_each_entry_safe(fence, tmp, &sp->fences.list, list) { + if (((value - fence->threshold) & 0x80000000U) != 0U) { + /* Fence is not yet expired, we are done */ + break; + } - if (flush) { - /* Wait until any concurrently executing handler has finished. */ - while (atomic_read(&waiter->state) != WLS_HANDLED) - schedule(); + list_del_init(&fence->list); + host1x_fence_signal(fence); } - kref_put(&waiter->refcount, waiter_release); + /* Re-enable interrupt if necessary */ + host1x_intr_update_hw_state(host, sp); + + spin_unlock(&sp->fences.lock); } -int host1x_intr_init(struct host1x *host, unsigned int irq_sync) +int host1x_intr_init(struct host1x *host) { unsigned int id; - u32 nb_pts = host1x_syncpt_nb_pts(host); mutex_init(&host->intr_mutex); - host->intr_syncpt_irq = irq_sync; - for (id = 0; id < nb_pts; ++id) { - struct host1x_syncpt *syncpt = host->syncpt + id; + for (id = 0; id < host1x_syncpt_nb_pts(host); ++id) { + struct host1x_syncpt *syncpt = &host->syncpt[id]; - spin_lock_init(&syncpt->intr.lock); - INIT_LIST_HEAD(&syncpt->intr.wait_head); - snprintf(syncpt->intr.thresh_irq_name, - sizeof(syncpt->intr.thresh_irq_name), - "host1x_sp_%02u", id); + spin_lock_init(&syncpt->fences.lock); + INIT_LIST_HEAD(&syncpt->fences.list); } return 0; @@ -310,8 +124,7 @@ void host1x_intr_start(struct host1x *host) int err; mutex_lock(&host->intr_mutex); - err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000), - syncpt_thresh_work); + err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000)); if (err) { mutex_unlock(&host->intr_mutex); return; @@ -321,36 +134,5 @@ void host1x_intr_start(struct host1x *host) void host1x_intr_stop(struct host1x *host) { - unsigned int id; - struct host1x_syncpt *syncpt = host->syncpt; - u32 nb_pts = host1x_syncpt_nb_pts(host); - - mutex_lock(&host->intr_mutex); - host1x_hw_intr_disable_all_syncpt_intrs(host); - - for (id = 0; id < nb_pts; ++id) { - struct host1x_waitlist *waiter, *next; - - list_for_each_entry_safe(waiter, next, - &syncpt[id].intr.wait_head, list) { - if (atomic_cmpxchg(&waiter->state, - WLS_CANCELLED, WLS_HANDLED) == WLS_CANCELLED) { - list_del(&waiter->list); - kref_put(&waiter->refcount, waiter_release); - } - } - - if (!list_empty(&syncpt[id].intr.wait_head)) { - /* output diagnostics */ - mutex_unlock(&host->intr_mutex); - pr_warn("%s cannot stop syncpt intr id=%u\n", - __func__, id); - return; - } - } - - host1x_hw_intr_free_syncpt_irq(host); - - mutex_unlock(&host->intr_mutex); } diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h index e4c346099273..3b5610b525e5 100644 --- a/drivers/gpu/host1x/intr.h +++ b/drivers/gpu/host1x/intr.h @@ -2,87 +2,17 @@ /* * Tegra host1x Interrupt Management * - * Copyright (c) 2010-2013, NVIDIA Corporation. + * Copyright (c) 2010-2021, NVIDIA Corporation. */ #ifndef __HOST1X_INTR_H #define __HOST1X_INTR_H -#include <linux/interrupt.h> -#include <linux/workqueue.h> - -struct host1x_syncpt; struct host1x; - -enum host1x_intr_action { - /* - * Perform cleanup after a submit has completed. - * 'data' points to a channel - */ - HOST1X_INTR_ACTION_SUBMIT_COMPLETE = 0, - - /* - * Wake up a task. - * 'data' points to a wait_queue_head_t - */ - HOST1X_INTR_ACTION_WAKEUP, - - /* - * Wake up a interruptible task. - * 'data' points to a wait_queue_head_t - */ - HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, - - HOST1X_INTR_ACTION_SIGNAL_FENCE, - - HOST1X_INTR_ACTION_COUNT -}; - -struct host1x_syncpt_intr { - spinlock_t lock; - struct list_head wait_head; - char thresh_irq_name[12]; - struct work_struct work; -}; - -struct host1x_waitlist { - struct list_head list; - struct kref refcount; - u32 thresh; - enum host1x_intr_action action; - atomic_t state; - void *data; - int count; -}; - -/* - * Schedule an action to be taken when a sync point reaches the given threshold. - * - * @id the sync point - * @thresh the threshold - * @action the action to take - * @data a pointer to extra data depending on action, see above - * @waiter waiter structure - assumes ownership - * @ref must be passed if cancellation is possible, else NULL - * - * This is a non-blocking api. - */ -int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt, - u32 thresh, enum host1x_intr_action action, - void *data, struct host1x_waitlist *waiter, - void **ref); - -/* - * Unreference an action submitted to host1x_intr_add_action(). - * You must call this if you passed non-NULL as ref. - * @ref the ref returned from host1x_intr_add_action() - * @flush wait until any pending handlers have completed before returning. - */ -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref, - bool flush); +struct host1x_syncpt_fence; /* Initialize host1x sync point interrupt */ -int host1x_intr_init(struct host1x *host, unsigned int irq_sync); +int host1x_intr_init(struct host1x *host); /* Deinitialize host1x sync point interrupt */ void host1x_intr_deinit(struct host1x *host); @@ -93,5 +23,10 @@ void host1x_intr_start(struct host1x *host); /* Disable host1x sync point interrupt */ void host1x_intr_stop(struct host1x *host); -irqreturn_t host1x_syncpt_thresh_fn(void *dev_id); +void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id); + +void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence); + +bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence); + #endif diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index b2761aa03b95..3ed49e1fd933 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -88,9 +88,15 @@ static void job_free(struct kref *ref) if (job->release) job->release(job); - if (job->waiter) - host1x_intr_put_ref(job->syncpt->host, job->syncpt->id, - job->waiter, false); + if (job->fence) { + /* + * remove_callback is atomic w.r.t. fence signaling, so + * after the call returns, we know that the callback is not + * in execution, and the fence can be safely freed. + */ + dma_fence_remove_callback(job->fence, &job->fence_cb); + dma_fence_put(job->fence); + } if (job->syncpt) host1x_syncpt_put(job->syncpt); diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index f87a8705f518..2d2007760eac 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/device.h> +#include <linux/dma-fence.h> #include <linux/slab.h> #include <trace/events/host1x.h> @@ -209,17 +210,6 @@ int host1x_syncpt_incr(struct host1x_syncpt *sp) } EXPORT_SYMBOL(host1x_syncpt_incr); -/* - * Updated sync point form hardware, and returns true if syncpoint is expired, - * false if we may need to wait - */ -static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh) -{ - host1x_hw_syncpt_load(sp->host, sp); - - return host1x_syncpt_is_expired(sp, thresh); -} - /** * host1x_syncpt_wait() - wait for a syncpoint to reach a given value * @sp: host1x syncpoint @@ -230,10 +220,10 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh) int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - void *ref; - struct host1x_waitlist *waiter; - int err = 0, check_count = 0; + struct dma_fence *fence; + long wait_err; + + host1x_hw_syncpt_load(sp->host, sp); if (value) *value = host1x_syncpt_load(sp); @@ -241,73 +231,29 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, if (host1x_syncpt_is_expired(sp, thresh)) return 0; - if (!timeout) { - err = -EAGAIN; - goto done; - } - - /* allocate a waiter */ - waiter = kzalloc(sizeof(*waiter), GFP_KERNEL); - if (!waiter) { - err = -ENOMEM; - goto done; - } - - /* schedule a wakeup when the syncpoint value is reached */ - err = host1x_intr_add_action(sp->host, sp, thresh, - HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, - &wq, waiter, &ref); - if (err) - goto done; - - err = -EAGAIN; - /* Caller-specified timeout may be impractically low */ if (timeout < 0) timeout = LONG_MAX; + else if (timeout == 0) + return -EAGAIN; - /* wait for the syncpoint, or timeout, or signal */ - while (timeout) { - long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout); - int remain; - - remain = wait_event_interruptible_timeout(wq, - syncpt_load_min_is_expired(sp, thresh), - check); - if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) { - if (value) - *value = host1x_syncpt_load(sp); + fence = host1x_fence_create(sp, thresh, false); + if (IS_ERR(fence)) + return PTR_ERR(fence); - err = 0; + wait_err = dma_fence_wait_timeout(fence, true, timeout); + if (wait_err == 0) + host1x_fence_cancel(fence); + dma_fence_put(fence); - break; - } - - if (remain < 0) { - err = remain; - break; - } - - timeout -= check; - - if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) { - dev_warn(sp->host->dev, - "%s: syncpoint id %u (%s) stuck waiting %d, timeout=%ld\n", - current->comm, sp->id, sp->name, - thresh, timeout); - - host1x_debug_dump_syncpts(sp->host); - - if (check_count == MAX_STUCK_CHECK_COUNT) - host1x_debug_dump(sp->host); - - check_count++; - } - } - - host1x_intr_put_ref(sp->host, sp->id, ref, true); + if (value) + *value = host1x_syncpt_load(sp); -done: - return err; + if (wait_err == 0) + return -EAGAIN; + else if (wait_err < 0) + return wait_err; + else + return 0; } EXPORT_SYMBOL(host1x_syncpt_wait); diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index 95cd29b79d6d..4c3f3b2f0e9c 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -14,6 +14,7 @@ #include <linux/kref.h> #include <linux/sched.h> +#include "fence.h" #include "intr.h" struct host1x; @@ -39,7 +40,7 @@ struct host1x_syncpt { struct host1x_syncpt_base *base; /* interrupt data */ - struct host1x_syncpt_intr intr; + struct host1x_fence_list fences; /* * If a submission incrementing this syncpoint fails, lock it so that diff --git a/include/linux/host1x.h b/include/linux/host1x.h index dc55d9d3b94f..9a9de4b97a25 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -8,6 +8,7 @@ #include <linux/device.h> #include <linux/dma-direction.h> +#include <linux/dma-fence.h> #include <linux/spinlock.h> #include <linux/types.h> @@ -221,7 +222,9 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, u32 syncpt_id); -struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold); +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold, + bool timeout); +void host1x_fence_cancel(struct dma_fence *fence); /* * host1x channel @@ -288,8 +291,9 @@ struct host1x_job { u32 syncpt_incrs; u32 syncpt_end; - /* Completion waiter ref */ - void *waiter; + /* Completion fence for job tracking */ + struct dma_fence *fence; + struct dma_fence_cb fence_cb; /* Maximum time to wait for this job */ unsigned int timeout; |