diff options
author | Sean Paul <seanpaul@chromium.org> | 2019-10-23 11:14:11 -0400 |
---|---|---|
committer | Sean Paul <seanpaul@chromium.org> | 2019-10-23 11:14:11 -0400 |
commit | 44bf67f32a6803339ac1ba721b158c3e2272cabe (patch) | |
tree | 1ed93503a4a1d20005b85df91bd81e5f5982f348 /drivers | |
parent | a96bf3cbd7b8557f5c5c7938e5f8926ea39d55e9 (diff) | |
parent | 2e79e22e092acd55da0b2db066e4826d7d152c41 (diff) |
Merge drm/drm-next into drm-misc-next
Parroting Daniel's backmerge justification from
2e79e22e092acd55da0b2db066e4826d7d152c41:
Thierry needs fd70c7755bf0 ("drm/bridge: tc358767: fix max_tu_symbol
value") to be able to merge his dp_link patch series.
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Diffstat (limited to 'drivers')
644 files changed, 23430 insertions, 15833 deletions
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 3b2525908dd8..a1a858ad4d18 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -905,8 +905,8 @@ void acpi_cppc_processor_exit(struct acpi_processor *pr) pcc_data[pcc_ss_id]->refcount--; if (!pcc_data[pcc_ss_id]->refcount) { pcc_mbox_free_channel(pcc_data[pcc_ss_id]->pcc_channel); - pcc_data[pcc_ss_id]->pcc_channel_acquired = 0; kfree(pcc_data[pcc_ss_id]); + pcc_data[pcc_ss_id] = NULL; } } } diff --git a/drivers/acpi/hmat/hmat.c b/drivers/acpi/hmat/hmat.c index 8f9a28a870b0..8b0de8a3c647 100644 --- a/drivers/acpi/hmat/hmat.c +++ b/drivers/acpi/hmat/hmat.c @@ -403,7 +403,7 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n", p->flags, p->processor_PD, p->memory_PD); - if (p->flags & ACPI_HMAT_MEMORY_PD_VALID) { + if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) { target = find_mem_target(p->memory_PD); if (!target) { pr_debug("HMAT: Memory Domain missing from SRAT\n"); diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 2261713d1aec..930a49fa4dfc 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -162,21 +162,23 @@ void acpi_processor_ppc_init(int cpu) struct acpi_processor *pr = per_cpu(processors, cpu); int ret; + if (!pr) + return; + ret = dev_pm_qos_add_request(get_cpu_device(cpu), &pr->perflib_req, DEV_PM_QOS_MAX_FREQUENCY, INT_MAX); - if (ret < 0) { + if (ret < 0) pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, ret); - return; - } } void acpi_processor_ppc_exit(int cpu) { struct acpi_processor *pr = per_cpu(processors, cpu); - dev_pm_qos_remove_request(&pr->perflib_req); + if (pr) + dev_pm_qos_remove_request(&pr->perflib_req); } static int acpi_processor_get_performance_control(struct acpi_processor *pr) diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index ec2638f1df4f..8227c7dd75b1 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -130,21 +130,23 @@ void acpi_thermal_cpufreq_init(int cpu) struct acpi_processor *pr = per_cpu(processors, cpu); int ret; + if (!pr) + return; + ret = dev_pm_qos_add_request(get_cpu_device(cpu), &pr->thermal_req, DEV_PM_QOS_MAX_FREQUENCY, INT_MAX); - if (ret < 0) { + if (ret < 0) pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu, ret); - return; - } } void acpi_thermal_cpufreq_exit(int cpu) { struct acpi_processor *pr = per_cpu(processors, cpu); - dev_pm_qos_remove_request(&pr->thermal_req); + if (pr) + dev_pm_qos_remove_request(&pr->thermal_req); } #else /* ! CONFIG_CPU_FREQ */ static int cpufreq_get_max_state(unsigned int cpu) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 9fa77d72ef27..2af937a8b1c5 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -362,19 +362,6 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { }, }, /* - * https://bugzilla.kernel.org/show_bug.cgi?id=196907 - * Some Dell XPS13 9360 cannot do suspend-to-idle using the Low Power - * S0 Idle firmware interface. - */ - { - .callback = init_default_s3, - .ident = "Dell XPS13 9360", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"), - }, - }, - /* * ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using * the Low Power S0 Idle firmware interface (see * https://bugzilla.kernel.org/show_bug.cgi?id=199057). diff --git a/drivers/android/binder.c b/drivers/android/binder.c index c0a491277aca..5b9ac2122e89 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -57,6 +57,7 @@ #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/seq_file.h> +#include <linux/string.h> #include <linux/uaccess.h> #include <linux/pid_namespace.h> #include <linux/security.h> @@ -66,6 +67,7 @@ #include <linux/task_work.h> #include <uapi/linux/android/binder.h> +#include <uapi/linux/android/binderfs.h> #include <asm/cacheflush.h> @@ -2876,7 +2878,7 @@ static void binder_transaction(struct binder_proc *proc, e->target_handle = tr->target.handle; e->data_size = tr->data_size; e->offsets_size = tr->offsets_size; - e->context_name = proc->context->name; + strscpy(e->context_name, proc->context->name, BINDERFS_MAX_NAME); if (reply) { binder_inner_proc_lock(proc); diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 6d79a1b0d446..d42a8b2f636a 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -156,7 +156,7 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked( } /** - * binder_alloc_buffer_lookup() - get buffer given user ptr + * binder_alloc_prepare_to_free() - get buffer given user ptr * @alloc: binder_alloc for this proc * @user_ptr: User pointer to buffer data * diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index bd47f7f72075..ae991097d14d 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -130,7 +130,7 @@ struct binder_transaction_log_entry { int return_error_line; uint32_t return_error; uint32_t return_error_param; - const char *context_name; + char context_name[BINDERFS_MAX_NAME + 1]; }; struct binder_transaction_log { diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index dd92faf197d5..05c2b32dcc4d 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1600,7 +1600,9 @@ static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hp */ if (!id || id->vendor != PCI_VENDOR_ID_INTEL) return; - if (((enum board_ids) id->driver_data) < board_ahci_pcs7) + + /* Skip applying the quirk on Denverton and beyond */ + if (((enum board_ids) id->driver_data) >= board_ahci_pcs7) return; /* diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 76d0f9de767b..58e09ffe8b9c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4791,27 +4791,6 @@ void ata_scsi_hotplug(struct work_struct *work) return; } - /* - * XXX - UGLY HACK - * - * The block layer suspend/resume path is fundamentally broken due - * to freezable kthreads and workqueue and may deadlock if a block - * device gets removed while resume is in progress. I don't know - * what the solution is short of removing freezable kthreads and - * workqueues altogether. - * - * The following is an ugly hack to avoid kicking off device - * removal while freezer is active. This is a joke but does avoid - * this particular deadlock scenario. - * - * https://bugzilla.kernel.org/show_bug.cgi?id=62801 - * http://marc.info/?l=linux-kernel&m=138695698516487 - */ -#ifdef CONFIG_FREEZER - while (pm_freezing) - msleep(10); -#endif - DPRINTK("ENTER\n"); mutex_lock(&ap->scsi_scan_mutex); diff --git a/drivers/base/core.c b/drivers/base/core.c index 2db62d98e395..7bd9cd366d41 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -9,6 +9,7 @@ */ #include <linux/acpi.h> +#include <linux/cpufreq.h> #include <linux/device.h> #include <linux/err.h> #include <linux/fwnode.h> @@ -3179,6 +3180,8 @@ void device_shutdown(void) wait_for_device_probe(); device_block_probing(); + cpufreq_suspend(); + spin_lock(&devices_kset->list_lock); /* * Walk the devices list backward, shutting down each in turn. diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 6bea4f3f8040..55907c27075b 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -540,6 +540,9 @@ static ssize_t soft_offline_page_store(struct device *dev, pfn >>= PAGE_SHIFT; if (!pfn_valid(pfn)) return -ENXIO; + /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ + if (!pfn_to_online_page(pfn)) + return -EIO; ret = soft_offline_page(pfn_to_page(pfn), 0); return ret == 0 ? count : ret; } diff --git a/drivers/base/platform.c b/drivers/base/platform.c index b6c6c7d97d5b..b230beb6ccb4 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -241,12 +241,8 @@ struct resource *platform_get_resource_byname(struct platform_device *dev, } EXPORT_SYMBOL_GPL(platform_get_resource_byname); -/** - * platform_get_irq_byname - get an IRQ for a device by name - * @dev: platform device - * @name: IRQ name - */ -int platform_get_irq_byname(struct platform_device *dev, const char *name) +static int __platform_get_irq_byname(struct platform_device *dev, + const char *name) { struct resource *r; @@ -262,12 +258,48 @@ int platform_get_irq_byname(struct platform_device *dev, const char *name) if (r) return r->start; - dev_err(&dev->dev, "IRQ %s not found\n", name); return -ENXIO; } + +/** + * platform_get_irq_byname - get an IRQ for a device by name + * @dev: platform device + * @name: IRQ name + * + * Get an IRQ like platform_get_irq(), but then by name rather then by index. + * + * Return: IRQ number on success, negative error number on failure. + */ +int platform_get_irq_byname(struct platform_device *dev, const char *name) +{ + int ret; + + ret = __platform_get_irq_byname(dev, name); + if (ret < 0 && ret != -EPROBE_DEFER) + dev_err(&dev->dev, "IRQ %s not found\n", name); + + return ret; +} EXPORT_SYMBOL_GPL(platform_get_irq_byname); /** + * platform_get_irq_byname_optional - get an optional IRQ for a device by name + * @dev: platform device + * @name: IRQ name + * + * Get an optional IRQ by name like platform_get_irq_byname(). Except that it + * does not print an error message if an IRQ can not be obtained. + * + * Return: IRQ number on success, negative error number on failure. + */ +int platform_get_irq_byname_optional(struct platform_device *dev, + const char *name) +{ + return __platform_get_irq_byname(dev, name); +} +EXPORT_SYMBOL_GPL(platform_get_irq_byname_optional); + +/** * platform_add_devices - add a numbers of platform devices * @devs: array of platform devices to add * @num: number of platform devices in array diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1410fa893653..f6f77eaa7217 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -994,6 +994,16 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) blk_queue_write_cache(lo->lo_queue, true, false); + if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) { + /* In case of direct I/O, match underlying block size */ + unsigned short bsize = bdev_logical_block_size( + inode->i_sb->s_bdev); + + blk_queue_logical_block_size(lo->lo_queue, bsize); + blk_queue_physical_block_size(lo->lo_queue, bsize); + blk_queue_io_min(lo->lo_queue, bsize); + } + loop_update_rotational(lo); loop_update_dio(lo); set_capacity(lo->lo_disk, size); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ac07e8c94c79..478aa86fc1f2 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -248,8 +248,8 @@ static void nbd_put(struct nbd_device *nbd) if (refcount_dec_and_mutex_lock(&nbd->refs, &nbd_index_mutex)) { idr_remove(&nbd_index_idr, nbd->index); - mutex_unlock(&nbd_index_mutex); nbd_dev_remove(nbd); + mutex_unlock(&nbd_index_mutex); } } diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index eabc116832a7..3d7fdea872f8 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -142,8 +142,7 @@ static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector) zone->wp = zone->start; break; default: - cmd->error = BLK_STS_NOTSUPP; - break; + return BLK_STS_NOTSUPP; } return BLK_STS_OK; } diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 7c4350c0fb77..39136675dae5 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -6639,10 +6639,13 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait, ceph_timeout_jiffies(rbd_dev->opts->lock_timeout)); - if (ret > 0) + if (ret > 0) { ret = rbd_dev->acquire_err; - else if (!ret) - ret = -ETIMEDOUT; + } else { + cancel_delayed_work_sync(&rbd_dev->lock_dwork); + if (!ret) + ret = -ETIMEDOUT; + } if (ret) { rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d58a359a6622..4285e75e52c3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -413,13 +413,14 @@ static void reset_bdev(struct zram *zram) static ssize_t backing_dev_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct file *file; struct zram *zram = dev_to_zram(dev); - struct file *file = zram->backing_dev; char *p; ssize_t ret; down_read(&zram->init_lock); - if (!zram->backing_dev) { + file = zram->backing_dev; + if (!file) { memcpy(buf, "none\n", 5); up_read(&zram->init_lock); return 5; diff --git a/drivers/char/random.c b/drivers/char/random.c index c2f7de9dc543..de434feb873a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2520,4 +2520,4 @@ void add_bootloader_randomness(const void *buf, unsigned int size) else add_device_randomness(buf, size); } -EXPORT_SYMBOL_GPL(add_bootloader_randomness);
\ No newline at end of file +EXPORT_SYMBOL_GPL(add_bootloader_randomness); diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c index b57fe09b428b..9dd6185a4b4e 100644 --- a/drivers/clk/ti/clk-7xx.c +++ b/drivers/clk/ti/clk-7xx.c @@ -683,7 +683,7 @@ static const struct omap_clkctrl_reg_data dra7_l4per2_clkctrl_regs[] __initconst { DRA7_L4PER2_MCASP2_CLKCTRL, dra7_mcasp2_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:0154:22" }, { DRA7_L4PER2_MCASP3_CLKCTRL, dra7_mcasp3_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:015c:22" }, { DRA7_L4PER2_MCASP5_CLKCTRL, dra7_mcasp5_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:016c:22" }, - { DRA7_L4PER2_MCASP8_CLKCTRL, dra7_mcasp8_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:0184:24" }, + { DRA7_L4PER2_MCASP8_CLKCTRL, dra7_mcasp8_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:0184:22" }, { DRA7_L4PER2_MCASP4_CLKCTRL, dra7_mcasp4_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:018c:22" }, { DRA7_L4PER2_UART7_CLKCTRL, dra7_uart7_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:01c4:24" }, { DRA7_L4PER2_UART8_CLKCTRL, dra7_uart8_bit_data, CLKF_SW_SUP, "l4per2-clkctrl:01d4:24" }, @@ -828,8 +828,8 @@ static struct ti_dt_clk dra7xx_clks[] = { DT_CLK(NULL, "mcasp6_aux_gfclk_mux", "l4per2-clkctrl:01f8:22"), DT_CLK(NULL, "mcasp7_ahclkx_mux", "l4per2-clkctrl:01fc:24"), DT_CLK(NULL, "mcasp7_aux_gfclk_mux", "l4per2-clkctrl:01fc:22"), - DT_CLK(NULL, "mcasp8_ahclkx_mux", "l4per2-clkctrl:0184:22"), - DT_CLK(NULL, "mcasp8_aux_gfclk_mux", "l4per2-clkctrl:0184:24"), + DT_CLK(NULL, "mcasp8_ahclkx_mux", "l4per2-clkctrl:0184:24"), + DT_CLK(NULL, "mcasp8_aux_gfclk_mux", "l4per2-clkctrl:0184:22"), DT_CLK(NULL, "mmc1_clk32k", "l3init-clkctrl:0008:8"), DT_CLK(NULL, "mmc1_fclk_div", "l3init-clkctrl:0008:25"), DT_CLK(NULL, "mmc1_fclk_mux", "l3init-clkctrl:0008:24"), diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index d8c2bd4391d0..11ff701ff4bb 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -25,7 +25,9 @@ static __init void timer_of_irq_exit(struct of_timer_irq *of_irq) struct clock_event_device *clkevt = &to->clkevt; - of_irq->percpu ? free_percpu_irq(of_irq->irq, clkevt) : + if (of_irq->percpu) + free_percpu_irq(of_irq->irq, clkevt); + else free_irq(of_irq->irq, clkevt); } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c52d6fa32aac..bffc11b87247 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2737,14 +2737,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) } EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); -/* - * Stop cpufreq at shutdown to make sure it isn't holding any locks - * or mutexes when secondary CPUs are halted. - */ -static struct syscore_ops cpufreq_syscore_ops = { - .shutdown = cpufreq_suspend, -}; - struct kobject *cpufreq_global_kobject; EXPORT_SYMBOL(cpufreq_global_kobject); @@ -2756,8 +2748,6 @@ static int __init cpufreq_core_init(void) cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj); BUG_ON(!cpufreq_global_kobject); - register_syscore_ops(&cpufreq_syscore_ops); - return 0; } module_param(off, int, 0444); diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 35ed56b9c34f..1e21fc3e9851 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -408,7 +408,7 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v) bytes = ~0ull; else if (size & 0x8000) bytes = (u64)(size & 0x7fff) << 10; - else if (size != 0x7fff) + else if (size != 0x7fff || dm->length < 0x20) bytes = (u64)size << 20; else bytes = (u64)get_unaligned((u32 *)&d[0x1C]) << 20; diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index addf0749dd8b..b1af0de2e100 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -381,7 +381,7 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx, pcie->device_id.vendor_id, pcie->device_id.device_id); p = pcie->device_id.class_code; - printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]); + printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]); } if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER) printk("%s""serial number: 0x%04x, 0x%04x\n", pfx, diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 8d3e778e988b..69f00f7453a3 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -267,6 +267,9 @@ static __init int efivar_ssdt_load(void) void *data; int ret; + if (!efivar_ssdt[0]) + return 0; + ret = efivar_init(efivar_ssdt_iter, &entries, true, &entries); list_for_each_entry_safe(entry, aux, &entries, list) { diff --git a/drivers/firmware/efi/rci2-table.c b/drivers/firmware/efi/rci2-table.c index 3e290f96620a..76b0c354a027 100644 --- a/drivers/firmware/efi/rci2-table.c +++ b/drivers/firmware/efi/rci2-table.c @@ -76,7 +76,7 @@ static u16 checksum(void) return chksum; } -int __init efi_rci2_sysfs_init(void) +static int __init efi_rci2_sysfs_init(void) { struct kobject *tables_kobj; int ret = -ENOMEM; diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c index 1d3f5ca3eaaf..ebd7977653a8 100644 --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -40,7 +40,7 @@ int __init efi_tpm_eventlog_init(void) { struct linux_efi_tpm_eventlog *log_tbl; struct efi_tcg2_final_events_table *final_tbl; - unsigned int tbl_size; + int tbl_size; int ret = 0; if (efi.tpm_log == EFI_INVALID_TABLE_ADDR) { @@ -75,16 +75,28 @@ int __init efi_tpm_eventlog_init(void) goto out; } - tbl_size = tpm2_calc_event_log_size((void *)efi.tpm_final_log - + sizeof(final_tbl->version) - + sizeof(final_tbl->nr_events), - final_tbl->nr_events, - log_tbl->log); + tbl_size = 0; + if (final_tbl->nr_events != 0) { + void *events = (void *)efi.tpm_final_log + + sizeof(final_tbl->version) + + sizeof(final_tbl->nr_events); + + tbl_size = tpm2_calc_event_log_size(events, + final_tbl->nr_events, + log_tbl->log); + } + + if (tbl_size < 0) { + pr_err(FW_BUG "Failed to parse event in TPM Final Events Log\n"); + goto out_calc; + } + memblock_reserve((unsigned long)final_tbl, tbl_size + sizeof(*final_tbl)); - early_memunmap(final_tbl, sizeof(*final_tbl)); efi_tpm_final_log_size = tbl_size; +out_calc: + early_memunmap(final_tbl, sizeof(*final_tbl)); out: early_memunmap(log_tbl, sizeof(*log_tbl)); return ret; diff --git a/drivers/firmware/google/vpd_decode.c b/drivers/firmware/google/vpd_decode.c index dda525c0f968..5c6f2a74f104 100644 --- a/drivers/firmware/google/vpd_decode.c +++ b/drivers/firmware/google/vpd_decode.c @@ -52,7 +52,7 @@ static int vpd_decode_entry(const u32 max_len, const u8 *input_buf, if (max_len - consumed < *entry_len) return VPD_FAIL; - consumed += decoded_len; + consumed += *entry_len; *_consumed = consumed; return VPD_OK; } diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c index fe7a73f52329..bb287f35cf40 100644 --- a/drivers/gpio/gpio-eic-sprd.c +++ b/drivers/gpio/gpio-eic-sprd.c @@ -530,11 +530,12 @@ static void sprd_eic_handle_one_type(struct gpio_chip *chip) } for_each_set_bit(n, ®, SPRD_EIC_PER_BANK_NR) { - girq = irq_find_mapping(chip->irq.domain, - bank * SPRD_EIC_PER_BANK_NR + n); + u32 offset = bank * SPRD_EIC_PER_BANK_NR + n; + + girq = irq_find_mapping(chip->irq.domain, offset); generic_handle_irq(girq); - sprd_eic_toggle_trigger(chip, girq, n); + sprd_eic_toggle_trigger(chip, girq, offset); } } } diff --git a/drivers/gpio/gpio-intel-mid.c b/drivers/gpio/gpio-intel-mid.c index 4d835f9089df..86a10c808ef6 100644 --- a/drivers/gpio/gpio-intel-mid.c +++ b/drivers/gpio/gpio-intel-mid.c @@ -293,8 +293,9 @@ static void intel_mid_irq_handler(struct irq_desc *desc) chip->irq_eoi(data); } -static void intel_mid_irq_init_hw(struct intel_mid_gpio *priv) +static int intel_mid_irq_init_hw(struct gpio_chip *chip) { + struct intel_mid_gpio *priv = gpiochip_get_data(chip); void __iomem *reg; unsigned base; @@ -309,6 +310,8 @@ static void intel_mid_irq_init_hw(struct intel_mid_gpio *priv) reg = gpio_reg(&priv->chip, base, GEDR); writel(~0, reg); } + + return 0; } static int __maybe_unused intel_gpio_runtime_idle(struct device *dev) @@ -372,6 +375,7 @@ static int intel_gpio_probe(struct pci_dev *pdev, girq = &priv->chip.irq; girq->chip = &intel_mid_irqchip; + girq->init_hw = intel_mid_irq_init_hw; girq->parent_handler = intel_mid_irq_handler; girq->num_parents = 1; girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents, @@ -384,9 +388,8 @@ static int intel_gpio_probe(struct pci_dev *pdev, girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_simple_irq; - intel_mid_irq_init_hw(priv); - pci_set_drvdata(pdev, priv); + retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv); if (retval) { dev_err(&pdev->dev, "gpiochip_add error %d\n", retval); diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c index 6bb9741ad036..e9e47c0d5be7 100644 --- a/drivers/gpio/gpio-lynxpoint.c +++ b/drivers/gpio/gpio-lynxpoint.c @@ -294,8 +294,9 @@ static struct irq_chip lp_irqchip = { .flags = IRQCHIP_SKIP_SET_WAKE, }; -static void lp_gpio_irq_init_hw(struct lp_gpio *lg) +static int lp_gpio_irq_init_hw(struct gpio_chip *chip) { + struct lp_gpio *lg = gpiochip_get_data(chip); unsigned long reg; unsigned base; @@ -307,6 +308,8 @@ static void lp_gpio_irq_init_hw(struct lp_gpio *lg) reg = lp_gpio_reg(&lg->chip, base, LP_INT_STAT); outl(0xffffffff, reg); } + + return 0; } static int lp_gpio_probe(struct platform_device *pdev) @@ -364,6 +367,7 @@ static int lp_gpio_probe(struct platform_device *pdev) girq = &gc->irq; girq->chip = &lp_irqchip; + girq->init_hw = lp_gpio_irq_init_hw; girq->parent_handler = lp_gpio_irq_handler; girq->num_parents = 1; girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents, @@ -373,9 +377,7 @@ static int lp_gpio_probe(struct platform_device *pdev) return -ENOMEM; girq->parents[0] = (unsigned)irq_rc->start; girq->default_type = IRQ_TYPE_NONE; - girq->handler = handle_simple_irq; - - lp_gpio_irq_init_hw(lg); + girq->handler = handle_bad_irq; } ret = devm_gpiochip_add_data(dev, gc, lg); diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c index 47d05e357e61..faf86ea9c51a 100644 --- a/drivers/gpio/gpio-max77620.c +++ b/drivers/gpio/gpio-max77620.c @@ -192,13 +192,13 @@ static int max77620_gpio_set_debounce(struct max77620_gpio *mgpio, case 0: val = MAX77620_CNFG_GPIO_DBNC_None; break; - case 1 ... 8: + case 1000 ... 8000: val = MAX77620_CNFG_GPIO_DBNC_8ms; break; - case 9 ... 16: + case 9000 ... 16000: val = MAX77620_CNFG_GPIO_DBNC_16ms; break; - case 17 ... 32: + case 17000 ... 32000: val = MAX77620_CNFG_GPIO_DBNC_32ms; break; default: diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 4f27ddfe1e2f..2f1e9da81c1e 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -362,8 +362,9 @@ static void mrfld_irq_handler(struct irq_desc *desc) chained_irq_exit(irqchip, desc); } -static void mrfld_irq_init_hw(struct mrfld_gpio *priv) +static int mrfld_irq_init_hw(struct gpio_chip *chip) { + struct mrfld_gpio *priv = gpiochip_get_data(chip); void __iomem *reg; unsigned int base; @@ -375,6 +376,8 @@ static void mrfld_irq_init_hw(struct mrfld_gpio *priv) reg = gpio_reg(&priv->chip, base, GFER); writel(0, reg); } + + return 0; } static const char *mrfld_gpio_get_pinctrl_dev_name(struct mrfld_gpio *priv) @@ -447,6 +450,7 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id girq = &priv->chip.irq; girq->chip = &mrfld_irqchip; + girq->init_hw = mrfld_irq_init_hw; girq->parent_handler = mrfld_irq_handler; girq->num_parents = 1; girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents, @@ -455,11 +459,10 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id if (!girq->parents) return -ENOMEM; girq->parents[0] = pdev->irq; + girq->first = irq_base; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; - mrfld_irq_init_hw(priv); - pci_set_drvdata(pdev, priv); retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv); if (retval) { diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 1eea2c6c2e1d..80ea49f570f4 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -317,7 +317,7 @@ struct gpio_desc *gpiod_get_from_of_node(struct device_node *node, transitory = flags & OF_GPIO_TRANSITORY; ret = gpiod_request(desc, label); - if (ret == -EBUSY && (flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE)) + if (ret == -EBUSY && (dflags & GPIOD_FLAGS_BIT_NONEXCLUSIVE)) return desc; if (ret) return ERR_PTR(ret); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index bdbc1649eafa..104ed299d5ea 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -86,6 +86,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, struct lock_class_key *lock_key, struct lock_class_key *request_key); static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); +static int gpiochip_irqchip_init_hw(struct gpio_chip *gpiochip); static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip); static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip); @@ -1406,6 +1407,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, machine_gpiochip_add(chip); + ret = gpiochip_irqchip_init_hw(chip); + if (ret) + goto err_remove_acpi_chip; + ret = gpiochip_irqchip_init_valid_mask(chip); if (ret) goto err_remove_acpi_chip; @@ -1622,6 +1627,16 @@ static struct gpio_chip *find_chip_by_name(const char *name) * The following is irqchip helper code for gpiochips. */ +static int gpiochip_irqchip_init_hw(struct gpio_chip *gc) +{ + struct gpio_irq_chip *girq = &gc->irq; + + if (!girq->init_hw) + return 0; + + return girq->init_hw(gc); +} + static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc) { struct gpio_irq_chip *girq = &gc->irq; @@ -2446,8 +2461,13 @@ static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip, { return 0; } - static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) {} + +static inline int gpiochip_irqchip_init_hw(struct gpio_chip *gpiochip) +{ + return 0; +} + static inline int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip) { return 0; @@ -3070,8 +3090,10 @@ int gpiod_direction_output(struct gpio_desc *desc, int value) if (!ret) goto set_output_value; /* Emulate open drain by not actively driving the line high */ - if (value) - return gpiod_direction_input(desc); + if (value) { + ret = gpiod_direction_input(desc); + goto set_output_flag; + } } else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) { ret = gpio_set_config(gc, gpio_chip_hwgpio(desc), @@ -3079,8 +3101,10 @@ int gpiod_direction_output(struct gpio_desc *desc, int value) if (!ret) goto set_output_value; /* Emulate open source by not actively driving the line low */ - if (!value) - return gpiod_direction_input(desc); + if (!value) { + ret = gpiod_direction_input(desc); + goto set_output_flag; + } } else { gpio_set_config(gc, gpio_chip_hwgpio(desc), PIN_CONFIG_DRIVE_PUSH_PULL); @@ -3088,6 +3112,17 @@ int gpiod_direction_output(struct gpio_desc *desc, int value) set_output_value: return gpiod_direction_output_raw_commit(desc, value); + +set_output_flag: + /* + * When emulating open-source or open-drain functionalities by not + * actively driving the line (setting mode to input) we still need to + * set the IS_OUT flag or otherwise we won't be able to set the line + * value anymore. + */ + if (ret == 0) + set_bit(FLAG_IS_OUT, &desc->flags); + return ret; } EXPORT_SYMBOL_GPL(gpiod_direction_output); @@ -3448,8 +3483,6 @@ static void gpio_set_open_drain_value_commit(struct gpio_desc *desc, bool value) if (value) { ret = chip->direction_input(chip, offset); - if (!ret) - clear_bit(FLAG_IS_OUT, &desc->flags); } else { ret = chip->direction_output(chip, offset, 0); if (!ret) @@ -3479,8 +3512,6 @@ static void gpio_set_open_source_value_commit(struct gpio_desc *desc, bool value set_bit(FLAG_IS_OUT, &desc->flags); } else { ret = chip->direction_input(chip, offset); - if (!ret) - clear_bit(FLAG_IS_OUT, &desc->flags); } trace_gpio_direction(desc_to_gpio(desc), !value, ret); if (ret < 0) diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 42e2c1f57152..00962a659009 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -54,7 +54,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ - amdgpu_vm_sdma.o amdgpu_pmu.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o + amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index eba42c752bca..82155ac3288a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -189,7 +189,7 @@ static int acp_hw_init(void *handle) u32 val = 0; u32 count = 0; struct device *dev; - struct i2s_platform_data *i2s_pdata; + struct i2s_platform_data *i2s_pdata = NULL; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -231,20 +231,21 @@ static int acp_hw_init(void *handle) adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), GFP_KERNEL); - if (adev->acp.acp_cell == NULL) - return -ENOMEM; + if (adev->acp.acp_cell == NULL) { + r = -ENOMEM; + goto failure; + } adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); if (adev->acp.acp_res == NULL) { - kfree(adev->acp.acp_cell); - return -ENOMEM; + r = -ENOMEM; + goto failure; } i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); if (i2s_pdata == NULL) { - kfree(adev->acp.acp_res); - kfree(adev->acp.acp_cell); - return -ENOMEM; + r = -ENOMEM; + goto failure; } switch (adev->asic_type) { @@ -341,14 +342,14 @@ static int acp_hw_init(void *handle) r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); if (r) - return r; + goto failure; for (i = 0; i < ACP_DEVS ; i++) { dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); if (r) { dev_err(dev, "Failed to add dev to genpd\n"); - return r; + goto failure; } } @@ -367,7 +368,8 @@ static int acp_hw_init(void *handle) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto failure; } udelay(100); } @@ -384,7 +386,8 @@ static int acp_hw_init(void *handle) break; if (--count == 0) { dev_err(&adev->pdev->dev, "Failed to reset ACP\n"); - return -ETIMEDOUT; + r = -ETIMEDOUT; + goto failure; } udelay(100); } @@ -393,6 +396,13 @@ static int acp_hw_init(void *handle) val &= ~ACP_SOFT_RESET__SoftResetAud_MASK; cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val); return 0; + +failure: + kfree(i2s_pdata); + kfree(adev->acp.acp_res); + kfree(adev->acp.acp_cell); + kfree(adev->acp.acp_genpd); + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 7bcf86c61999..61e38e43ad1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -270,7 +270,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_create_list_entry_array(&args->in, &info); if (r) - goto error_free; + return r; switch (args->in.operation) { case AMDGPU_BO_LIST_OP_CREATE: @@ -283,8 +283,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); mutex_unlock(&fpriv->bo_list_lock); if (r < 0) { - amdgpu_bo_list_put(list); - return r; + goto error_put_list; } handle = r; @@ -306,9 +305,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, mutex_unlock(&fpriv->bo_list_lock); if (IS_ERR(old)) { - amdgpu_bo_list_put(list); r = PTR_ERR(old); - goto error_free; + goto error_put_list; } amdgpu_bo_list_put(old); @@ -325,8 +323,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, return 0; +error_put_list: + amdgpu_bo_list_put(list); + error_free: - if (info) - kvfree(info); + kvfree(info); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 3fae1007143e..4da1d7fb10f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -81,9 +81,10 @@ * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS. * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches + * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 34 +#define KMS_DRIVER_MINOR 35 #define KMS_DRIVER_PATCHLEVEL 0 #define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 @@ -1047,6 +1048,41 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENODEV; } +#ifdef CONFIG_DRM_AMDGPU_SI + if (!amdgpu_si_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + dev_info(&pdev->dev, + "SI support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + if (!amdgpu_cik_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + dev_info(&pdev->dev, + "CIK support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif + /* Get rid of things like offb */ ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb"); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 554a59b3c4a6..6ee4021910e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -165,6 +165,7 @@ struct amdgpu_gfx_config { uint32_t num_sc_per_sh; uint32_t num_packer_per_sc; uint32_t pa_sc_tile_steering_override; + uint64_t tcc_disabled_mask; }; struct amdgpu_cu_info { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index f6147528be64..d55f5baa83d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -144,41 +144,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) struct amdgpu_device *adev; int r, acpi_status; -#ifdef CONFIG_DRM_AMDGPU_SI - if (!amdgpu_si_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - dev_info(dev->dev, - "SI support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - if (!amdgpu_cik_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: - dev_info(dev->dev, - "CIK support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif - adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); if (adev == NULL) { return -ENOMEM; @@ -787,6 +752,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.pa_sc_tile_steering_override = adev->gfx.config.pa_sc_tile_steering_override; + dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; + return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e2fb141ff2e5..5251352f5922 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -603,14 +603,12 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct ttm_bo_global *glob = adev->mman.bdev.glob; struct amdgpu_vm_bo_base *bo_base; -#if 0 if (vm->bulk_moveable) { spin_lock(&glob->lru_lock); ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); spin_unlock(&glob->lru_lock); return; } -#endif memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 638c821611ab..957811b73672 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -1691,6 +1691,17 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) } } +static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) +{ + /* TCCs are global (not instanced). */ + uint32_t tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) | + RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE); + + adev->gfx.config.tcc_disabled_mask = + REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | + (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); +} + static void gfx_v10_0_constants_init(struct amdgpu_device *adev) { u32 tmp; @@ -1702,6 +1713,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) gfx_v10_0_setup_rb(adev); gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); + gfx_v10_0_get_tcc_info(adev); adev->gfx.config.pa_sc_tile_steering_override = gfx_v10_0_init_pa_sc_tile_steering_override(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 85393a99a848..de9b995b65b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -317,10 +317,12 @@ static int nv_asic_reset(struct amdgpu_device *adev) struct smu_context *smu = &adev->smu; if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); ret = smu_baco_reset(smu); } else { - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); ret = nv_asic_mode1_reset(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index fa2f70ce2e2b..f6e81680dd7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1129,7 +1129,7 @@ static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); amdgpu_ring_write(ring, seq); /* reference */ - amdgpu_ring_write(ring, 0xfffffff); /* mask */ + amdgpu_ring_write(ring, 0xffffffff); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f70658a536a9..f8ab80c8801b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -558,12 +558,14 @@ static int soc15_asic_reset(struct amdgpu_device *adev) { switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_BACO: - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); return soc15_asic_baco_reset(adev); case AMD_RESET_METHOD_MODE2: return soc15_mode2_reset(adev); default: - amdgpu_inc_vram_lost(adev); + if (!adev->in_suspend) + amdgpu_inc_vram_lost(adev); return soc15_asic_mode1_reset(adev); } } @@ -771,8 +773,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); -#else -# warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); break; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f4e0f27a76de..c67d3c41db19 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2385,8 +2385,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY) dm->dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true; - if (adev->asic_type == CHIP_RENOIR) - dm->dc->debug.disable_stutter = true; return 0; fail: @@ -6025,7 +6023,9 @@ static void amdgpu_dm_enable_crtc_interrupts(struct drm_device *dev, struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; int i; +#ifdef CONFIG_DEBUG_FS enum amdgpu_dm_pipe_crc_source source; +#endif for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 1787b9bf800a..76d54885374a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -668,6 +668,7 @@ struct clock_source *dce100_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 318e9c2e2ca8..89620adc81d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -714,6 +714,7 @@ struct clock_source *dce110_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 83e1878161c9..21a657e79306 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -687,6 +687,7 @@ struct clock_source *dce112_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 8b85e5274bba..7c52f7f9196c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -500,6 +500,7 @@ static struct clock_source *dce120_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 4625df9f9fd2..643ccb0ade00 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -701,6 +701,7 @@ struct clock_source *dce80_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 59305e411a66..1599bb971111 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -786,6 +786,7 @@ struct clock_source *dcn10_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index b4e3ce22ed52..5a2763daff4d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1077,6 +1077,7 @@ struct clock_source *dcn20_clock_source_create( return &clk_src->base; } + kfree(clk_src); BREAK_TO_DEBUGGER(); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 8cd9de8b1a7a..ef673bffc241 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -3,7 +3,17 @@ DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o -CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse -mpreferred-stack-boundary=4 +ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) + cc_stack_align := -mpreferred-stack-boundary=4 +else ifneq ($(call cc-option, -mstack-alignment=16),) + cc_stack_align := -mstack-alignment=16 +endif + +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse $(cc_stack_align) + +ifdef CONFIG_CC_IS_CLANG +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2 +endif AMD_DAL_DCN21 = $(addprefix $(AMDDALPATH)/dc/dcn21/,$(DCN21)) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index 456cd0e3289c..3b6ed60dcd35 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -39,9 +39,6 @@ * ways. Unless there is something clearly wrong with it the code should * remain as-is as it provides us with a guarantee from HW that it is correct. */ - -typedef unsigned int uint; - typedef struct { double DPPCLK; double DISPCLK; @@ -4774,7 +4771,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0; mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0; for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) { - uint m; + unsigned int m; locals->cursor_bw[k] = 0; locals->cursor_bw_pre[k] = 0; @@ -5285,7 +5282,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; double FullDETBufferingTimeYStutterCriticalPlane = 0; double TimeToFinishSwathTransferStutterCriticalPlane = 0; - uint k, j; + unsigned int k, j; mode_lib->vba.TotalActiveDPP = 0; mode_lib->vba.TotalDCCActiveDPP = 0; @@ -5507,7 +5504,7 @@ static void CalculateDCFCLKDeepSleep( double DPPCLK[], double *DCFCLKDeepSleep) { - uint k; + unsigned int k; double DisplayPipeLineDeliveryTimeLuma; double DisplayPipeLineDeliveryTimeChroma; //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX]; @@ -5727,7 +5724,7 @@ static void CalculatePixelDeliveryTimes( double DisplayPipeRequestDeliveryTimeChromaPrefetch[]) { double req_per_swath_ub; - uint k; + unsigned int k; for (k = 0; k < NumberOfActivePlanes; ++k) { if (VRatio[k] <= 1) { @@ -5869,7 +5866,7 @@ static void CalculateMetaAndPTETimes( unsigned int dpte_groups_per_row_chroma_ub; unsigned int num_group_per_lower_vm_stage; unsigned int num_req_per_lower_vm_stage; - uint k; + unsigned int k; for (k = 0; k < NumberOfActivePlanes; ++k) { if (GPUVMEnable == true) { diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 33960fb38a5d..4acf139ea014 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -843,6 +843,8 @@ static int smu_sw_init(void *handle) smu->smu_baco.state = SMU_BACO_STATE_EXIT; smu->smu_baco.platform_support = false; + mutex_init(&smu->sensor_lock); + smu->watermarks_bitmap = 0; smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index f1f072012fac..d493a3f8c07a 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1018,6 +1018,7 @@ static int arcturus_read_sensor(struct smu_context *smu, if (!data || !size) return -EINVAL; + mutex_lock(&smu->sensor_lock); switch (sensor) { case AMDGPU_PP_SENSOR_MAX_FAN_RPM: *(uint32_t *)data = pptable->FanMaximumRpm; @@ -1044,6 +1045,7 @@ static int arcturus_read_sensor(struct smu_context *smu, default: ret = smu_smc_read_sensor(smu, sensor, data, size); } + mutex_unlock(&smu->sensor_lock); return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 6109815a0401..23171a4d9a31 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -344,6 +344,7 @@ struct smu_context const struct smu_funcs *funcs; const struct pptable_funcs *ppt_funcs; struct mutex mutex; + struct mutex sensor_lock; uint64_t pool_size; struct smu_table_context smu_table; diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 12c0e469bf35..0b461404af6b 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -547,7 +547,7 @@ static int navi10_get_metrics_table(struct smu_context *smu, struct smu_table_context *smu_table= &smu->smu_table; int ret = 0; - if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + HZ / 1000)) { + if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(100))) { ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)smu_table->metrics_table, false); if (ret) { @@ -1386,6 +1386,7 @@ static int navi10_read_sensor(struct smu_context *smu, if(!data || !size) return -EINVAL; + mutex_lock(&smu->sensor_lock); switch (sensor) { case AMDGPU_PP_SENSOR_MAX_FAN_RPM: *(uint32_t *)data = pptable->FanMaximumRpm; @@ -1409,6 +1410,7 @@ static int navi10_read_sensor(struct smu_context *smu, default: ret = smu_smc_read_sensor(smu, sensor, data, size); } + mutex_unlock(&smu->sensor_lock); return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index dc754447f0dd..23c12018dbc1 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -655,7 +655,7 @@ static int polaris10_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, count = SMU_MAX_SMIO_LEVELS; for (level = 0; level < count; level++) { table->SmioTable2.Pattern[level].Voltage = - PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE); + PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[level].value * VOLTAGE_SCALE); /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/ table->SmioTable2.Pattern[level].Smio = (uint8_t) level; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c index 7c960b07746f..ae18fbcb26fb 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c @@ -456,7 +456,7 @@ static int vegam_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, count = SMU_MAX_SMIO_LEVELS; for (level = 0; level < count; level++) { table->SmioTable2.Pattern[level].Voltage = PP_HOST_TO_SMC_US( - data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE); + data->mvdd_voltage_table.entries[level].value * VOLTAGE_SCALE); /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/ table->SmioTable2.Pattern[level].Smio = (uint8_t) level; diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 64386ee3f878..bbd8ebd58434 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -3023,6 +3023,7 @@ static int vega20_read_sensor(struct smu_context *smu, if(!data || !size) return -EINVAL; + mutex_lock(&smu->sensor_lock); switch (sensor) { case AMDGPU_PP_SENSOR_MAX_FAN_RPM: *(uint32_t *)data = pptable->FanMaximumRpm; @@ -3048,6 +3049,7 @@ static int vega20_read_sensor(struct smu_context *smu, default: ret = smu_smc_read_sensor(smu, sensor, data, size); } + mutex_unlock(&smu->sensor_lock); return ret; } diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c index abfa587db189..e465cc4879c9 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c @@ -43,9 +43,8 @@ komeda_wb_encoder_atomic_check(struct drm_encoder *encoder, struct komeda_data_flow_cfg dflow; int err; - if (!writeback_job || !writeback_job->fb) { + if (!writeback_job) return 0; - } if (!crtc_st->active) { DRM_DEBUG_ATOMIC("Cannot write the composition result out on a inactive CRTC.\n"); @@ -167,8 +166,10 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms, &komeda_wb_encoder_helper_funcs, formats, n_formats); komeda_put_fourcc_list(formats); - if (err) + if (err) { + kfree(kwb_conn); return err; + } drm_connector_helper_add(&wb_conn->base, &komeda_wb_conn_helper_funcs); diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c index 22c0847986df..875a3a9eabfa 100644 --- a/drivers/gpu/drm/arm/malidp_mw.c +++ b/drivers/gpu/drm/arm/malidp_mw.c @@ -131,7 +131,7 @@ malidp_mw_encoder_atomic_check(struct drm_encoder *encoder, struct drm_framebuffer *fb; int i, n_planes; - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; fb = conn_state->writeback_job->fb; @@ -248,7 +248,7 @@ void malidp_mw_atomic_commit(struct drm_device *drm, mw_state = to_mw_state(conn_state); - if (conn_state->writeback_job && conn_state->writeback_job->fb) { + if (conn_state->writeback_job) { struct drm_framebuffer *fb = conn_state->writeback_job->fb; DRM_DEV_DEBUG_DRIVER(drm->dev, diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 51664a2df731..da7e35b0893d 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -729,6 +729,8 @@ static int tc_set_video_mode(struct tc_data *tc, int lower_margin = mode->vsync_start - mode->vdisplay; int vsync_len = mode->vsync_end - mode->vsync_start; u32 dp0_syncval; + u32 bits_per_pixel = 24; + u32 in_bw, out_bw; /* * Recommended maximum number of symbols transferred in a transfer unit: @@ -736,7 +738,10 @@ static int tc_set_video_mode(struct tc_data *tc, * (output active video bandwidth in bytes)) * Must be less than tu_size. */ - max_tu_symbol = TU_SIZE_RECOMMENDED - 1; + + in_bw = mode->clock * bits_per_pixel / 8; + out_bw = tc->link.base.num_lanes * tc->link.base.rate; + max_tu_symbol = DIV_ROUND_UP(in_bw * TU_SIZE_RECOMMENDED, out_bw); dev_dbg(tc->dev, "set mode %dx%d\n", mode->hdisplay, mode->vdisplay); diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 419381abbdd1..14aeaf736321 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -430,10 +430,15 @@ static int drm_atomic_connector_check(struct drm_connector *connector, return -EINVAL; } - if (writeback_job->out_fence && !writeback_job->fb) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n", - connector->base.id, connector->name); - return -EINVAL; + if (!writeback_job->fb) { + if (writeback_job->out_fence) { + DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] requesting out-fence without framebuffer\n", + connector->base.id, connector->name); + return -EINVAL; + } + + drm_writeback_cleanup_job(writeback_job); + state->writeback_job = NULL; } return 0; diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 834cb7c930e7..474ac04d5600 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -159,6 +159,9 @@ static const struct edid_quirk { /* Medion MD 30217 PG */ { "MED", 0x7b8, EDID_QUIRK_PREFER_LARGE_75 }, + /* Lenovo G50 */ + { "SDC", 18514, EDID_QUIRK_FORCE_6BPC }, + /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */ { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC }, diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c index ff138b6ec48b..43d9e3bb3a94 100644 --- a/drivers/gpu/drm/drm_writeback.c +++ b/drivers/gpu/drm/drm_writeback.c @@ -324,6 +324,9 @@ void drm_writeback_cleanup_job(struct drm_writeback_job *job) if (job->fb) drm_framebuffer_put(job->fb); + if (job->out_fence) + dma_fence_put(job->out_fence); + kfree(job); } EXPORT_SYMBOL(drm_writeback_cleanup_job); @@ -366,25 +369,29 @@ drm_writeback_signal_completion(struct drm_writeback_connector *wb_connector, { unsigned long flags; struct drm_writeback_job *job; + struct dma_fence *out_fence; spin_lock_irqsave(&wb_connector->job_lock, flags); job = list_first_entry_or_null(&wb_connector->job_queue, struct drm_writeback_job, list_entry); - if (job) { + if (job) list_del(&job->list_entry); - if (job->out_fence) { - if (status) - dma_fence_set_error(job->out_fence, status); - dma_fence_signal(job->out_fence); - dma_fence_put(job->out_fence); - } - } + spin_unlock_irqrestore(&wb_connector->job_lock, flags); if (WARN_ON(!job)) return; + out_fence = job->out_fence; + if (out_fence) { + if (status) + dma_fence_set_error(out_fence, status); + dma_fence_signal(out_fence); + dma_fence_put(out_fence); + job->out_fence = NULL; + } + INIT_WORK(&job->cleanup_work, cleanup_work); queue_work(system_long_wq, &job->cleanup_work); } diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 0d21402945ab..3c6d57df262d 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -76,7 +76,7 @@ config DRM_I915_CAPTURE_ERROR This option enables capturing the GPU state when a hang is detected. This information is vital for triaging hangs and assists in debugging. Please report any hang to - https://bugs.freedesktop.org/enter_bug.cgi?product=DRI + https://bugs.freedesktop.org/enter_bug.cgi?product=DRI for triaging. If in doubt, say "Y". @@ -105,11 +105,11 @@ config DRM_I915_USERPTR If in doubt, say "Y". config DRM_I915_GVT - bool "Enable Intel GVT-g graphics virtualization host support" - depends on DRM_I915 - depends on 64BIT - default n - help + bool "Enable Intel GVT-g graphics virtualization host support" + depends on DRM_I915 + depends on 64BIT + default n + help Choose this option if you want to enable Intel GVT-g graphics virtualization technology host support with integrated graphics. With GVT-g, it's possible to have one integrated graphics diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 00786a142ff0..eea79125b3ea 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -1,34 +1,34 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_I915_WERROR - bool "Force GCC to throw an error instead of a warning when compiling" - # As this may inadvertently break the build, only allow the user - # to shoot oneself in the foot iff they aim really hard - depends on EXPERT - # We use the dependency on !COMPILE_TEST to not be enabled in - # allmodconfig or allyesconfig configurations - depends on !COMPILE_TEST + bool "Force GCC to throw an error instead of a warning when compiling" + # As this may inadvertently break the build, only allow the user + # to shoot oneself in the foot iff they aim really hard + depends on EXPERT + # We use the dependency on !COMPILE_TEST to not be enabled in + # allmodconfig or allyesconfig configurations + depends on !COMPILE_TEST select HEADER_TEST - default n - help - Add -Werror to the build flags for (and only for) i915.ko. - Do not enable this unless you are writing code for the i915.ko module. + default n + help + Add -Werror to the build flags for (and only for) i915.ko. + Do not enable this unless you are writing code for the i915.ko module. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG - bool "Enable additional driver debugging" - depends on DRM_I915 - select DEBUG_FS - select PREEMPT_COUNT - select REFCOUNT_FULL - select I2C_CHARDEV - select STACKDEPOT - select DRM_DP_AUX_CHARDEV - select X86_MSR # used by igt/pm_rpm - select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) - select DRM_DEBUG_MM if DRM=y + bool "Enable additional driver debugging" + depends on DRM_I915 + select DEBUG_FS + select PREEMPT_COUNT + select REFCOUNT_FULL + select I2C_CHARDEV + select STACKDEPOT + select DRM_DP_AUX_CHARDEV + select X86_MSR # used by igt/pm_rpm + select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) + select DRM_DEBUG_MM if DRM=y select DRM_DEBUG_SELFTEST select DMABUF_SELFTESTS select SW_SYNC # signaling validation framework (igt/syncobj*) @@ -36,14 +36,14 @@ config DRM_I915_DEBUG select DRM_I915_SELFTEST select DRM_I915_DEBUG_RUNTIME_PM select DRM_I915_DEBUG_MMIO - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG_MMIO bool "Always insert extra checks around mmio access by default" @@ -59,16 +59,16 @@ config DRM_I915_DEBUG_MMIO If in doubt, say "N". config DRM_I915_DEBUG_GEM - bool "Insert extra checks into the GEM internals" - default n - depends on DRM_I915_WERROR - help - Enable extra sanity checks (including BUGs) along the GEM driver - paths that may slow the system down and if hit hang the machine. + bool "Insert extra checks into the GEM internals" + default n + depends on DRM_I915_WERROR + help + Enable extra sanity checks (including BUGs) along the GEM driver + paths that may slow the system down and if hit hang the machine. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_ERRLOG_GEM bool "Insert extra logging (very verbose) for common GEM errors" @@ -111,41 +111,41 @@ config DRM_I915_TRACE_GTT If in doubt, say "N". config DRM_I915_SW_FENCE_DEBUG_OBJECTS - bool "Enable additional driver debugging for fence objects" - depends on DRM_I915 - select DEBUG_OBJECTS - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + bool "Enable additional driver debugging for fence objects" + depends on DRM_I915 + select DEBUG_OBJECTS + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_SW_FENCE_CHECK_DAG - bool "Enable additional driver debugging for detecting dependency cycles" - depends on DRM_I915 - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will catch some internal issues. + bool "Enable additional driver debugging for detecting dependency cycles" + depends on DRM_I915 + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_DEBUG_GUC - bool "Enable additional driver debugging for GuC" - depends on DRM_I915 - default n - help - Choose this option to turn on extra driver debugging that may affect - performance but will help resolve GuC related issues. + bool "Enable additional driver debugging for GuC" + depends on DRM_I915 + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will help resolve GuC related issues. - Recommended for driver developers only. + Recommended for driver developers only. - If in doubt, say "N". + If in doubt, say "N". config DRM_I915_SELFTEST bool "Enable selftests upon driver load" @@ -178,15 +178,15 @@ config DRM_I915_SELFTEST_BROKEN If in doubt, say "N". config DRM_I915_LOW_LEVEL_TRACEPOINTS - bool "Enable low level request tracing events" - depends on DRM_I915 - default n - help - Choose this option to turn on low level request tracing events. - This provides the ability to precisely monitor engine utilisation - and also analyze the request dependency resolving timeline. - - If in doubt, say "N". + bool "Enable low level request tracing events" + depends on DRM_I915 + default n + help + Choose this option to turn on low level request tracing events. + This provides the ability to precisely monitor engine utilisation + and also analyze the request dependency resolving timeline. + + If in doubt, say "N". config DRM_I915_DEBUG_VBLANK_EVADE bool "Enable extra debug warnings for vblank evasion" diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 2587ea834f06..a16a2daef977 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -46,10 +46,12 @@ i915-y += i915_drv.o \ i915_pci.o \ i915_scatterlist.o \ i915_suspend.o \ + i915_switcheroo.o \ i915_sysfs.o \ i915_utils.o \ intel_csr.o \ intel_device_info.o \ + intel_memory_region.o \ intel_pch.o \ intel_pm.o \ intel_runtime_pm.o \ @@ -83,8 +85,11 @@ gt-y += \ gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ gt/intel_gt_pm_irq.o \ + gt/intel_gt_requests.o \ gt/intel_hangcheck.o \ + gt/intel_llc.o \ gt/intel_lrc.o \ + gt/intel_rc6.o \ gt/intel_renderstate.o \ gt/intel_reset.o \ gt/intel_ringbuffer.o \ @@ -118,6 +123,7 @@ gem-y += \ gem/i915_gem_pages.o \ gem/i915_gem_phys.o \ gem/i915_gem_pm.o \ + gem/i915_gem_region.o \ gem/i915_gem_shmem.o \ gem/i915_gem_shrinker.o \ gem/i915_gem_stolen.o \ @@ -172,6 +178,7 @@ i915-y += \ display/intel_display_power.o \ display/intel_dpio_phy.o \ display/intel_dpll_mgr.o \ + display/intel_dsb.o \ display/intel_fbc.o \ display/intel_fifo_underrun.o \ display/intel_frontbuffer.o \ @@ -182,7 +189,8 @@ i915-y += \ display/intel_psr.o \ display/intel_quirks.o \ display/intel_sprite.o \ - display/intel_tc.o + display/intel_tc.o \ + display/intel_vga.o i915-$(CONFIG_ACPI) += \ display/intel_acpi.o \ display/intel_opregion.o diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index d3fb75bb9eb1..c5a552a69752 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -199,7 +199,6 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->disable_cxsr = false; crtc_state->update_wm_pre = false; crtc_state->update_wm_post = false; - crtc_state->fb_changed = false; crtc_state->fifo_changed = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; @@ -264,10 +263,13 @@ static void intel_atomic_setup_scaler(struct intel_crtc_scaler_state *scaler_sta */ mode = PS_SCALER_MODE_NORMAL; } else { + struct intel_plane *linked = + plane_state->planar_linked_plane; + mode = PS_SCALER_MODE_PLANAR; - if (plane_state->linked_plane) - mode |= PS_PLANE_Y_SEL(plane_state->linked_plane->id); + if (linked) + mode |= PS_PLANE_Y_SEL(linked->id); } } else if (INTEL_GEN(dev_priv) > 9 || IS_GEMINILAKE(dev_priv)) { mode = PS_SCALER_MODE_NORMAL; @@ -371,6 +373,15 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, */ if (!plane) { struct drm_plane_state *state; + + /* + * GLK+ scalers don't have a HQ mode so it + * isn't necessary to change between HQ and dyn mode + * on those platforms. + */ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + continue; + plane = drm_plane_from_index(&dev_priv->drm, i); state = drm_atomic_get_plane_state(drm_state, plane); if (IS_ERR(state)) { @@ -378,13 +389,6 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, plane->base.id); return PTR_ERR(state); } - - /* - * the plane is added after plane checks are run, - * but since this plane is unchanged just do the - * minimum required validation. - */ - crtc_state->base.planes_changed = true; } intel_plane = to_intel_plane(plane); diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index d1fcdf206da4..a6cff5a160fb 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -144,6 +144,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ struct intel_plane_state *new_plane_state) { struct intel_plane *plane = to_intel_plane(new_plane_state->base.plane); + const struct drm_framebuffer *fb = new_plane_state->base.fb; int ret; new_crtc_state->active_planes &= ~BIT(plane->id); @@ -164,11 +165,11 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ new_crtc_state->active_planes |= BIT(plane->id); if (new_plane_state->base.visible && - is_planar_yuv_format(new_plane_state->base.fb->format->format)) + drm_format_info_is_yuv_semiplanar(fb->format)) new_crtc_state->nv12_planes |= BIT(plane->id); if (new_plane_state->base.visible && - new_plane_state->base.fb->format->format == DRM_FORMAT_C8) + fb->format->format == DRM_FORMAT_C8) new_crtc_state->c8_planes |= BIT(plane->id); if (new_plane_state->base.visible || old_plane_state->base.visible) @@ -194,14 +195,11 @@ get_crtc_from_states(const struct intel_plane_state *old_plane_state, return NULL; } -static int intel_plane_atomic_check(struct drm_plane *_plane, - struct drm_plane_state *_new_plane_state) +int intel_plane_atomic_check(struct intel_atomic_state *state, + struct intel_plane *plane) { - struct intel_plane *plane = to_intel_plane(_plane); - struct intel_atomic_state *state = - to_intel_atomic_state(_new_plane_state->state); struct intel_plane_state *new_plane_state = - to_intel_plane_state(_new_plane_state); + intel_atomic_get_new_plane_state(state, plane); const struct intel_plane_state *old_plane_state = intel_atomic_get_old_plane_state(state, plane); struct intel_crtc *crtc = @@ -320,9 +318,9 @@ void skl_update_planes_on_crtc(struct intel_atomic_state *state, if (new_plane_state->base.visible) { intel_update_plane(plane, new_crtc_state, new_plane_state); - } else if (new_plane_state->slave) { + } else if (new_plane_state->planar_slave) { struct intel_plane *master = - new_plane_state->linked_plane; + new_plane_state->planar_linked_plane; /* * We update the slave plane from this function because @@ -368,5 +366,4 @@ void i9xx_update_planes_on_crtc(struct intel_atomic_state *state, const struct drm_plane_helper_funcs intel_plane_helper_funcs = { .prepare_fb = intel_prepare_plane_fb, .cleanup_fb = intel_cleanup_plane_fb, - .atomic_check = intel_plane_atomic_check, }; diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h index cb7ef4f9eafd..dc85af02e9b7 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h @@ -41,6 +41,8 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, struct intel_plane_state *intel_state); +int intel_plane_atomic_check(struct intel_atomic_state *state, + struct intel_plane *plane); int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *crtc_state, const struct intel_plane_state *old_plane_state, diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index ddcccf4408c3..ed18511befa3 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -560,8 +560,9 @@ static void ilk_audio_codec_disable(struct intel_encoder *encoder, u32 tmp, eldv; i915_reg_t aud_config, aud_cntrl_st2; - DRM_DEBUG_KMS("Disable audio codec on port %c, pipe %c\n", - port_name(port), pipe_name(pipe)); + DRM_DEBUG_KMS("Disable audio codec on [ENCODER:%d:%s], pipe %c\n", + encoder->base.base.id, encoder->base.name, + pipe_name(pipe)); if (WARN_ON(port == PORT_A)) return; @@ -609,8 +610,9 @@ static void ilk_audio_codec_enable(struct intel_encoder *encoder, int len, i; i915_reg_t hdmiw_hdmiedid, aud_config, aud_cntl_st, aud_cntrl_st2; - DRM_DEBUG_KMS("Enable audio codec on port %c, pipe %c, %u bytes ELD\n", - port_name(port), pipe_name(pipe), drm_eld_size(eld)); + DRM_DEBUG_KMS("Enable audio codec on [ENCODER:%d:%s], pipe %c, %u bytes ELD\n", + encoder->base.base.id, encoder->base.name, + pipe_name(pipe), drm_eld_size(eld)); if (WARN_ON(port == PORT_A)) return; @@ -850,11 +852,23 @@ static unsigned long i915_audio_component_get_power(struct device *kdev) ret = intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); - /* Force CDCLK to 2*BCLK as long as we need audio to be powered. */ - if (dev_priv->audio_power_refcount++ == 0) - if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + if (dev_priv->audio_power_refcount++ == 0) { + if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) { + I915_WRITE(AUD_FREQ_CNTRL, dev_priv->audio_freq_cntrl); + DRM_DEBUG_KMS("restored AUD_FREQ_CNTRL to 0x%x\n", + dev_priv->audio_freq_cntrl); + } + + /* Force CDCLK to 2*BCLK as long as we need audio powered. */ + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, true); + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + I915_WRITE(AUD_PIN_BUF_CTL, + (I915_READ(AUD_PIN_BUF_CTL) | + AUD_PIN_BUF_ENABLE)); + } + return ret; } @@ -865,7 +879,7 @@ static void i915_audio_component_put_power(struct device *kdev, /* Stop forcing CDCLK to 2*BCLK if no need for audio to be powered. */ if (--dev_priv->audio_power_refcount == 0) - if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) glk_force_audio_cdclk(dev_priv, false); intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO, cookie); @@ -1114,6 +1128,12 @@ static void i915_audio_component_init(struct drm_i915_private *dev_priv) return; } + if (IS_TIGERLAKE(dev_priv) || IS_ICELAKE(dev_priv)) { + dev_priv->audio_freq_cntrl = I915_READ(AUD_FREQ_CNTRL); + DRM_DEBUG_KMS("init value of AUD_FREQ_CNTRL of 0x%x\n", + dev_priv->audio_freq_cntrl); + } + dev_priv->audio_component_registered = true; } diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index efb39f350b19..63c1bd4c2954 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1270,7 +1270,7 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, " "disabling port %c DVI/HDMI support\n", port_name(port), info->alternate_ddc_pin, - port_name(p), port_name(port)); + port_name(p), port_name(p)); /* * If we have multiple ports supposedly sharing the @@ -1278,9 +1278,14 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv, * port. Otherwise they share the same ddc bin and * system couldn't communicate with them separately. * - * Give child device order the priority, first come first - * served. + * Give inverse child device order the priority, + * last one wins. Yes, there are real machines + * (eg. Asrock B250M-HDV) where VBT has both + * port A and port E with the same AUX ch and + * we must pick port E :( */ + info = &dev_priv->vbt.ddi_port_info[p]; + info->supports_dvi = false; info->supports_hdmi = false; info->alternate_ddc_pin = 0; @@ -1316,7 +1321,7 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, " "disabling port %c DP support\n", port_name(port), info->alternate_aux_channel, - port_name(p), port_name(port)); + port_name(p), port_name(p)); /* * If we have multiple ports supposedlt sharing the @@ -1324,9 +1329,14 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv, * port. Otherwise they share the same aux channel * and system couldn't communicate with them separately. * - * Give child device order the priority, first come first - * served. + * Give inverse child device order the priority, + * last one wins. Yes, there are real machines + * (eg. Asrock B250M-HDV) where VBT has both + * port A and port E with the same AUX ch and + * we must pick port E :( */ + info = &dev_priv->vbt.ddi_port_info[p]; + info->supports_dp = false; info->alternate_aux_channel = 0; } @@ -1389,6 +1399,7 @@ static enum port dvo_port_to_port(u8 dvo_port) [PORT_D] = { DVO_PORT_HDMID, DVO_PORT_DPD, -1}, [PORT_E] = { DVO_PORT_CRT, DVO_PORT_HDMIE, DVO_PORT_DPE}, [PORT_F] = { DVO_PORT_HDMIF, DVO_PORT_DPF, -1}, + [PORT_G] = { DVO_PORT_HDMIG, DVO_PORT_DPG, -1}, }; enum port port; int i; @@ -1615,7 +1626,7 @@ parse_general_definitions(struct drm_i915_private *dev_priv, expected_size = 37; } else if (bdb->version <= 215) { expected_size = 38; - } else if (bdb->version <= 216) { + } else if (bdb->version <= 229) { expected_size = 39; } else { expected_size = sizeof(*child); @@ -1833,7 +1844,7 @@ void intel_bios_init(struct drm_i915_private *dev_priv) const struct bdb_header *bdb; u8 __iomem *bios = NULL; - if (!HAS_DISPLAY(dev_priv)) { + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) { DRM_DEBUG_KMS("Skipping VBT init due to disabled display.\n"); return; } @@ -2248,6 +2259,9 @@ enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, case DP_AUX_F: aux_ch = AUX_CH_F; break; + case DP_AUX_G: + aux_ch = AUX_CH_G; + break; default: MISSING_CASE(info->alternate_aux_channel); aux_ch = AUX_CH_A; diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h index 4969189e620f..98f064828a57 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.h +++ b/drivers/gpu/drm/i915/display/intel_bios.h @@ -1,5 +1,5 @@ /* - * Copyright © 2016 Intel Corporation + * Copyright © 2016-2019 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -35,6 +35,7 @@ #include <drm/i915_drm.h> struct drm_i915_private; +enum port; enum intel_backlight_type { INTEL_BACKLIGHT_PMIC, diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 688858ebe4d0..22e83f857de8 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -35,28 +35,54 @@ static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv, if (ret) return ret; - switch (val & 0xf) { - case 0: - qi->dram_type = INTEL_DRAM_DDR4; - break; - case 1: - qi->dram_type = INTEL_DRAM_DDR3; - break; - case 2: - qi->dram_type = INTEL_DRAM_LPDDR3; - break; - case 3: - qi->dram_type = INTEL_DRAM_LPDDR3; - break; - default: - MISSING_CASE(val & 0xf); - break; + if (IS_GEN(dev_priv, 12)) { + switch (val & 0xf) { + case 0: + qi->dram_type = INTEL_DRAM_DDR4; + break; + case 3: + qi->dram_type = INTEL_DRAM_LPDDR4; + break; + case 4: + qi->dram_type = INTEL_DRAM_DDR3; + break; + case 5: + qi->dram_type = INTEL_DRAM_LPDDR3; + break; + default: + MISSING_CASE(val & 0xf); + break; + } + } else if (IS_GEN(dev_priv, 11)) { + switch (val & 0xf) { + case 0: + qi->dram_type = INTEL_DRAM_DDR4; + break; + case 1: + qi->dram_type = INTEL_DRAM_DDR3; + break; + case 2: + qi->dram_type = INTEL_DRAM_LPDDR3; + break; + case 3: + qi->dram_type = INTEL_DRAM_LPDDR4; + break; + default: + MISSING_CASE(val & 0xf); + break; + } + } else { + MISSING_CASE(INTEL_GEN(dev_priv)); + qi->dram_type = INTEL_DRAM_LPDDR3; /* Conservative default */ } qi->num_channels = (val & 0xf0) >> 4; qi->num_points = (val & 0xf00) >> 8; - qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 8; + if (IS_GEN(dev_priv, 12)) + qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 16; + else if (IS_GEN(dev_priv, 11)) + qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 8; return 0; } @@ -132,20 +158,25 @@ static int icl_sagv_max_dclk(const struct intel_qgv_info *qi) } struct intel_sa_info { - u8 deburst, mpagesize, deprogbwlimit, displayrtids; + u16 displayrtids; + u8 deburst, deprogbwlimit; }; static const struct intel_sa_info icl_sa_info = { .deburst = 8, - .mpagesize = 16, .deprogbwlimit = 25, /* GB/s */ .displayrtids = 128, }; -static int icl_get_bw_info(struct drm_i915_private *dev_priv) +static const struct intel_sa_info tgl_sa_info = { + .deburst = 16, + .deprogbwlimit = 34, /* GB/s */ + .displayrtids = 256, +}; + +static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) { struct intel_qgv_info qi = {}; - const struct intel_sa_info *sa = &icl_sa_info; bool is_y_tile = true; /* assume y tile may be used */ int num_channels; int deinterleave; @@ -233,14 +264,16 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, void intel_bw_init_hw(struct drm_i915_private *dev_priv) { - if (IS_GEN(dev_priv, 11)) - icl_get_bw_info(dev_priv); + if (IS_GEN(dev_priv, 12)) + icl_get_bw_info(dev_priv, &tgl_sa_info); + else if (IS_GEN(dev_priv, 11)) + icl_get_bw_info(dev_priv, &icl_sa_info); } static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv, int num_planes) { - if (IS_GEN(dev_priv, 11)) + if (INTEL_GEN(dev_priv) >= 11) /* * FIXME with SAGV disabled maybe we can assume * point 1 will always be used? Seems to match diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index d0bc42e5039c..3d867963a6d1 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -21,6 +21,7 @@ * DEALINGS IN THE SOFTWARE. */ +#include "intel_atomic.h" #include "intel_cdclk.h" #include "intel_display_types.h" #include "intel_sideband.h" @@ -1161,28 +1162,88 @@ static void skl_uninit_cdclk(struct drm_i915_private *dev_priv) skl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } -static int bxt_calc_cdclk(int min_cdclk) -{ - if (min_cdclk > 576000) - return 624000; - else if (min_cdclk > 384000) - return 576000; - else if (min_cdclk > 288000) - return 384000; - else if (min_cdclk > 144000) - return 288000; - else - return 144000; +static const struct intel_cdclk_vals bxt_cdclk_table[] = { + { .refclk = 19200, .cdclk = 144000, .divider = 8, .ratio = 60 }, + { .refclk = 19200, .cdclk = 288000, .divider = 4, .ratio = 60 }, + { .refclk = 19200, .cdclk = 384000, .divider = 3, .ratio = 60 }, + { .refclk = 19200, .cdclk = 576000, .divider = 2, .ratio = 60 }, + { .refclk = 19200, .cdclk = 624000, .divider = 2, .ratio = 65 }, + {} +}; + +static const struct intel_cdclk_vals glk_cdclk_table[] = { + { .refclk = 19200, .cdclk = 79200, .divider = 8, .ratio = 33 }, + { .refclk = 19200, .cdclk = 158400, .divider = 4, .ratio = 33 }, + { .refclk = 19200, .cdclk = 316800, .divider = 2, .ratio = 33 }, + {} +}; + +static const struct intel_cdclk_vals cnl_cdclk_table[] = { + { .refclk = 19200, .cdclk = 168000, .divider = 4, .ratio = 35 }, + { .refclk = 19200, .cdclk = 336000, .divider = 2, .ratio = 35 }, + { .refclk = 19200, .cdclk = 528000, .divider = 2, .ratio = 55 }, + + { .refclk = 24000, .cdclk = 168000, .divider = 4, .ratio = 28 }, + { .refclk = 24000, .cdclk = 336000, .divider = 2, .ratio = 28 }, + { .refclk = 24000, .cdclk = 528000, .divider = 2, .ratio = 44 }, + {} +}; + +static const struct intel_cdclk_vals icl_cdclk_table[] = { + { .refclk = 19200, .cdclk = 172800, .divider = 2, .ratio = 18 }, + { .refclk = 19200, .cdclk = 192000, .divider = 2, .ratio = 20 }, + { .refclk = 19200, .cdclk = 307200, .divider = 2, .ratio = 32 }, + { .refclk = 19200, .cdclk = 326400, .divider = 4, .ratio = 68 }, + { .refclk = 19200, .cdclk = 556800, .divider = 2, .ratio = 58 }, + { .refclk = 19200, .cdclk = 652800, .divider = 2, .ratio = 68 }, + + { .refclk = 24000, .cdclk = 180000, .divider = 2, .ratio = 15 }, + { .refclk = 24000, .cdclk = 192000, .divider = 2, .ratio = 16 }, + { .refclk = 24000, .cdclk = 312000, .divider = 2, .ratio = 26 }, + { .refclk = 24000, .cdclk = 324000, .divider = 4, .ratio = 54 }, + { .refclk = 24000, .cdclk = 552000, .divider = 2, .ratio = 46 }, + { .refclk = 24000, .cdclk = 648000, .divider = 2, .ratio = 54 }, + + { .refclk = 38400, .cdclk = 172800, .divider = 2, .ratio = 9 }, + { .refclk = 38400, .cdclk = 192000, .divider = 2, .ratio = 10 }, + { .refclk = 38400, .cdclk = 307200, .divider = 2, .ratio = 16 }, + { .refclk = 38400, .cdclk = 326400, .divider = 4, .ratio = 34 }, + { .refclk = 38400, .cdclk = 556800, .divider = 2, .ratio = 29 }, + { .refclk = 38400, .cdclk = 652800, .divider = 2, .ratio = 34 }, + {} +}; + +static int bxt_calc_cdclk(struct drm_i915_private *dev_priv, int min_cdclk) +{ + const struct intel_cdclk_vals *table = dev_priv->cdclk.table; + int i; + + for (i = 0; table[i].refclk; i++) + if (table[i].refclk == dev_priv->cdclk.hw.ref && + table[i].cdclk >= min_cdclk) + return table[i].cdclk; + + WARN(1, "Cannot satisfy minimum cdclk %d with refclk %u\n", + min_cdclk, dev_priv->cdclk.hw.ref); + return 0; } -static int glk_calc_cdclk(int min_cdclk) +static int bxt_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { - if (min_cdclk > 158400) - return 316800; - else if (min_cdclk > 79200) - return 158400; - else - return 79200; + const struct intel_cdclk_vals *table = dev_priv->cdclk.table; + int i; + + if (cdclk == dev_priv->cdclk.hw.bypass) + return 0; + + for (i = 0; table[i].refclk; i++) + if (table[i].refclk == dev_priv->cdclk.hw.ref && + table[i].cdclk == cdclk) + return dev_priv->cdclk.hw.ref * table[i].ratio; + + WARN(1, "cdclk %d not valid for refclk %u\n", + cdclk, dev_priv->cdclk.hw.ref); + return 0; } static u8 bxt_calc_voltage_level(int cdclk) @@ -1190,69 +1251,99 @@ static u8 bxt_calc_voltage_level(int cdclk) return DIV_ROUND_UP(cdclk, 25000); } -static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +static u8 cnl_calc_voltage_level(int cdclk) { - int ratio; - - if (cdclk == dev_priv->cdclk.hw.bypass) + if (cdclk > 336000) + return 2; + else if (cdclk > 168000) + return 1; + else return 0; +} - switch (cdclk) { - default: - MISSING_CASE(cdclk); - /* fall through */ - case 144000: - case 288000: - case 384000: - case 576000: - ratio = 60; - break; - case 624000: - ratio = 65; - break; - } +static u8 icl_calc_voltage_level(int cdclk) +{ + if (cdclk > 556800) + return 2; + else if (cdclk > 312000) + return 1; + else + return 0; +} - return dev_priv->cdclk.hw.ref * ratio; +static u8 ehl_calc_voltage_level(int cdclk) +{ + if (cdclk > 312000) + return 2; + else if (cdclk > 180000) + return 1; + else + return 0; } -static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +static void cnl_readout_refclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - int ratio; + if (I915_READ(SKL_DSSM) & CNL_DSSM_CDCLK_PLL_REFCLK_24MHz) + cdclk_state->ref = 24000; + else + cdclk_state->ref = 19200; +} - if (cdclk == dev_priv->cdclk.hw.bypass) - return 0; +static void icl_readout_refclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) +{ + u32 dssm = I915_READ(SKL_DSSM) & ICL_DSSM_CDCLK_PLL_REFCLK_MASK; - switch (cdclk) { + switch (dssm) { default: - MISSING_CASE(cdclk); + MISSING_CASE(dssm); /* fall through */ - case 79200: - case 158400: - case 316800: - ratio = 33; + case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz: + cdclk_state->ref = 24000; + break; + case ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz: + cdclk_state->ref = 19200; + break; + case ICL_DSSM_CDCLK_PLL_REFCLK_38_4MHz: + cdclk_state->ref = 38400; break; } - - return dev_priv->cdclk.hw.ref * ratio; } -static void bxt_de_pll_update(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) +static void bxt_de_pll_readout(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - u32 val; + u32 val, ratio; - cdclk_state->ref = 19200; - cdclk_state->vco = 0; + if (INTEL_GEN(dev_priv) >= 11) + icl_readout_refclk(dev_priv, cdclk_state); + else if (IS_CANNONLAKE(dev_priv)) + cnl_readout_refclk(dev_priv, cdclk_state); + else + cdclk_state->ref = 19200; val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) + if ((val & BXT_DE_PLL_PLL_ENABLE) == 0 || + (val & BXT_DE_PLL_LOCK) == 0) { + /* + * CDCLK PLL is disabled, the VCO/ratio doesn't matter, but + * setting it to zero is a way to signal that. + */ + cdclk_state->vco = 0; return; + } - if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) - return; + /* + * CNL+ have the ratio directly in the PLL enable register, gen9lp had + * it in a separate PLL control register. + */ + if (INTEL_GEN(dev_priv) >= 10) + ratio = val & CNL_CDCLK_PLL_RATIO_MASK; + else + ratio = I915_READ(BXT_DE_PLL_CTL) & BXT_DE_PLL_RATIO_MASK; - val = I915_READ(BXT_DE_PLL_CTL); - cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; + cdclk_state->vco = ratio * cdclk_state->ref; } static void bxt_get_cdclk(struct drm_i915_private *dev_priv, @@ -1261,12 +1352,19 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, u32 divider; int div; - bxt_de_pll_update(dev_priv, cdclk_state); + bxt_de_pll_readout(dev_priv, cdclk_state); - cdclk_state->cdclk = cdclk_state->bypass = cdclk_state->ref; + if (INTEL_GEN(dev_priv) >= 12) + cdclk_state->bypass = cdclk_state->ref / 2; + else if (INTEL_GEN(dev_priv) >= 11) + cdclk_state->bypass = 50000; + else + cdclk_state->bypass = cdclk_state->ref; - if (cdclk_state->vco == 0) + if (cdclk_state->vco == 0) { + cdclk_state->cdclk = cdclk_state->bypass; goto out; + } divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; @@ -1275,13 +1373,15 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, div = 2; break; case BXT_CDCLK_CD2X_DIV_SEL_1_5: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + WARN(IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10, + "Unsupported divider\n"); div = 3; break; case BXT_CDCLK_CD2X_DIV_SEL_2: div = 4; break; case BXT_CDCLK_CD2X_DIV_SEL_4: + WARN(INTEL_GEN(dev_priv) >= 10, "Unsupported divider\n"); div = 8; break; default: @@ -1297,7 +1397,7 @@ static void bxt_get_cdclk(struct drm_i915_private *dev_priv, * at least what the CDCLK frequency requires. */ cdclk_state->voltage_level = - bxt_calc_voltage_level(cdclk_state->cdclk); + dev_priv->display.calc_voltage_level(cdclk_state->cdclk); } static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) @@ -1332,259 +1432,6 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk.hw.vco = vco; } -static void bxt_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state, - enum pipe pipe) -{ - int cdclk = cdclk_state->cdclk; - int vco = cdclk_state->vco; - u32 val, divider; - int ret; - - /* cdclk = vco / 2 / div{1,1.5,2,4} */ - switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - default: - WARN_ON(cdclk != dev_priv->cdclk.hw.bypass); - WARN_ON(vco != 0); - /* fall through */ - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - case 3: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; - break; - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; - break; - case 8: - divider = BXT_CDCLK_CD2X_DIV_SEL_4; - break; - } - - /* - * Inform power controller of upcoming frequency change. BSpec - * requires us to wait up to 150usec, but that leads to timeouts; - * the 2ms used here is based on experiment. - */ - ret = sandybridge_pcode_write_timeout(dev_priv, - HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000, 150, 2); - if (ret) { - DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", - ret, cdclk); - return; - } - - if (dev_priv->cdclk.hw.vco != 0 && - dev_priv->cdclk.hw.vco != vco) - bxt_de_pll_disable(dev_priv); - - if (dev_priv->cdclk.hw.vco != vco) - bxt_de_pll_enable(dev_priv, vco); - - val = divider | skl_cdclk_decimal(cdclk); - if (pipe == INVALID_PIPE) - val |= BXT_CDCLK_CD2X_PIPE_NONE; - else - val |= BXT_CDCLK_CD2X_PIPE(pipe); - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (cdclk >= 500000) - val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - I915_WRITE(CDCLK_CTL, val); - - if (pipe != INVALID_PIPE) - intel_wait_for_vblank(dev_priv, pipe); - - /* - * The timeout isn't specified, the 2ms used here is based on - * experiment. - * FIXME: Waiting for the request completion could be delayed until - * the next PCODE request based on BSpec. - */ - ret = sandybridge_pcode_write_timeout(dev_priv, - HSW_PCODE_DE_WRITE_FREQ_REQ, - cdclk_state->voltage_level, 150, 2); - if (ret) { - DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", - ret, cdclk); - return; - } - - intel_update_cdclk(dev_priv); -} - -static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - u32 cdctl, expected; - - intel_update_cdclk(dev_priv); - intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); - - if (dev_priv->cdclk.hw.vco == 0 || - dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Some BIOS versions leave an incorrect decimal frequency value and - * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, - * so sanitize this register. - */ - cdctl = I915_READ(CDCLK_CTL); - /* - * Let's ignore the pipe field, since BIOS could have configured the - * dividers both synching to an active pipe, or asynchronously - * (PIPE_NONE). - */ - cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; - - expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (dev_priv->cdclk.hw.cdclk >= 500000) - expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk.hw.cdclk = 0; - - /* force full PLL disable + enable */ - dev_priv->cdclk.hw.vco = -1; -} - -static void bxt_init_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state cdclk_state; - - bxt_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk.hw.cdclk != 0 && - dev_priv->cdclk.hw.vco != 0) - return; - - cdclk_state = dev_priv->cdclk.hw; - - /* - * FIXME: - * - The initial CDCLK needs to be read from VBT. - * Need to make this change after VBT has changes for BXT. - */ - if (IS_GEMINILAKE(dev_priv)) { - cdclk_state.cdclk = glk_calc_cdclk(0); - cdclk_state.vco = glk_de_pll_vco(dev_priv, cdclk_state.cdclk); - } else { - cdclk_state.cdclk = bxt_calc_cdclk(0); - cdclk_state.vco = bxt_de_pll_vco(dev_priv, cdclk_state.cdclk); - } - cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); - - bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); -} - -static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; - - cdclk_state.cdclk = cdclk_state.bypass; - cdclk_state.vco = 0; - cdclk_state.voltage_level = bxt_calc_voltage_level(cdclk_state.cdclk); - - bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); -} - -static int cnl_calc_cdclk(int min_cdclk) -{ - if (min_cdclk > 336000) - return 528000; - else if (min_cdclk > 168000) - return 336000; - else - return 168000; -} - -static u8 cnl_calc_voltage_level(int cdclk) -{ - if (cdclk > 336000) - return 2; - else if (cdclk > 168000) - return 1; - else - return 0; -} - -static void cnl_cdclk_pll_update(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) -{ - u32 val; - - if (I915_READ(SKL_DSSM) & CNL_DSSM_CDCLK_PLL_REFCLK_24MHz) - cdclk_state->ref = 24000; - else - cdclk_state->ref = 19200; - - cdclk_state->vco = 0; - - val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) - return; - - cdclk_state->vco = (val & CNL_CDCLK_PLL_RATIO_MASK) * cdclk_state->ref; -} - -static void cnl_get_cdclk(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) -{ - u32 divider; - int div; - - cnl_cdclk_pll_update(dev_priv, cdclk_state); - - cdclk_state->cdclk = cdclk_state->bypass = cdclk_state->ref; - - if (cdclk_state->vco == 0) - goto out; - - divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; - - switch (divider) { - case BXT_CDCLK_CD2X_DIV_SEL_1: - div = 2; - break; - case BXT_CDCLK_CD2X_DIV_SEL_2: - div = 4; - break; - default: - MISSING_CASE(divider); - return; - } - - cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); - - out: - /* - * Can't read this out :( Let's assume it's - * at least what the CDCLK frequency requires. - */ - cdclk_state->voltage_level = - cnl_calc_voltage_level(cdclk_state->cdclk); -} - static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) { u32 val; @@ -1618,7 +1465,27 @@ static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk.hw.vco = vco; } -static void cnl_set_cdclk(struct drm_i915_private *dev_priv, +static u32 bxt_cdclk_cd2x_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) +{ + if (INTEL_GEN(dev_priv) >= 12) { + if (pipe == INVALID_PIPE) + return TGL_CDCLK_CD2X_PIPE_NONE; + else + return TGL_CDCLK_CD2X_PIPE(pipe); + } else if (INTEL_GEN(dev_priv) >= 11) { + if (pipe == INVALID_PIPE) + return ICL_CDCLK_CD2X_PIPE_NONE; + else + return ICL_CDCLK_CD2X_PIPE(pipe); + } else { + if (pipe == INVALID_PIPE) + return BXT_CDCLK_CD2X_PIPE_NONE; + else + return BXT_CDCLK_CD2X_PIPE(pipe); + } +} + +static void bxt_set_cdclk(struct drm_i915_private *dev_priv, const struct intel_cdclk_state *cdclk_state, enum pipe pipe) { @@ -1627,17 +1494,28 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, u32 val, divider; int ret; - ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); + /* Inform power controller of upcoming frequency change. */ + if (INTEL_GEN(dev_priv) >= 10) + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + else + /* + * BSpec requires us to wait up to 150usec, but that leads to + * timeouts; the 2ms used here is based on experiment. + */ + ret = sandybridge_pcode_write_timeout(dev_priv, + HSW_PCODE_DE_WRITE_FREQ_REQ, + 0x80000000, 150, 2); + if (ret) { - DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", - ret); + DRM_ERROR("Failed to inform PCU about cdclk change (err %d, freq %d)\n", + ret, cdclk); return; } - /* cdclk = vco / 2 / div{1,2} */ + /* cdclk = vco / 2 / div{1,1.5,2,4} */ switch (DIV_ROUND_CLOSEST(vco, cdclk)) { default: WARN_ON(cdclk != dev_priv->cdclk.hw.bypass); @@ -1646,67 +1524,87 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, case 2: divider = BXT_CDCLK_CD2X_DIV_SEL_1; break; + case 3: + WARN(IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 10, + "Unsupported divider\n"); + divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; + break; case 4: divider = BXT_CDCLK_CD2X_DIV_SEL_2; break; + case 8: + WARN(INTEL_GEN(dev_priv) >= 10, "Unsupported divider\n"); + divider = BXT_CDCLK_CD2X_DIV_SEL_4; + break; } - if (dev_priv->cdclk.hw.vco != 0 && - dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_disable(dev_priv); - - if (dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_enable(dev_priv, vco); + if (INTEL_GEN(dev_priv) >= 10) { + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + cnl_cdclk_pll_disable(dev_priv); - val = divider | skl_cdclk_decimal(cdclk); - if (pipe == INVALID_PIPE) - val |= BXT_CDCLK_CD2X_PIPE_NONE; - else - val |= BXT_CDCLK_CD2X_PIPE(pipe); - I915_WRITE(CDCLK_CTL, val); + if (dev_priv->cdclk.hw.vco != vco) + cnl_cdclk_pll_enable(dev_priv, vco); - if (pipe != INVALID_PIPE) - intel_wait_for_vblank(dev_priv, pipe); + } else { + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_disable(dev_priv); - /* inform PCU of the change */ - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, - cdclk_state->voltage_level); + if (dev_priv->cdclk.hw.vco != vco) + bxt_de_pll_enable(dev_priv, vco); + } - intel_update_cdclk(dev_priv); + val = divider | skl_cdclk_decimal(cdclk) | + bxt_cdclk_cd2x_pipe(dev_priv, pipe); /* - * Can't read out the voltage level :( - * Let's just assume everything is as expected. + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. */ - dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; -} + if (IS_GEN9_LP(dev_priv) && cdclk >= 500000) + val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + I915_WRITE(CDCLK_CTL, val); -static int cnl_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; + if (pipe != INVALID_PIPE) + intel_wait_for_vblank(dev_priv, pipe); - if (cdclk == dev_priv->cdclk.hw.bypass) - return 0; + if (INTEL_GEN(dev_priv) >= 10) { + ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, + cdclk_state->voltage_level); + } else { + /* + * The timeout isn't specified, the 2ms used here is based on + * experiment. + * FIXME: Waiting for the request completion could be delayed + * until the next PCODE request based on BSpec. + */ + ret = sandybridge_pcode_write_timeout(dev_priv, + HSW_PCODE_DE_WRITE_FREQ_REQ, + cdclk_state->voltage_level, + 150, 2); + } - switch (cdclk) { - default: - MISSING_CASE(cdclk); - /* fall through */ - case 168000: - case 336000: - ratio = dev_priv->cdclk.hw.ref == 19200 ? 35 : 28; - break; - case 528000: - ratio = dev_priv->cdclk.hw.ref == 19200 ? 55 : 44; - break; + if (ret) { + DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", + ret, cdclk); + return; } - return dev_priv->cdclk.hw.ref * ratio; + intel_update_cdclk(dev_priv); + + if (INTEL_GEN(dev_priv) >= 10) + /* + * Can't read out the voltage level :( + * Let's just assume everything is as expected. + */ + dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; } -static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) +static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) { u32 cdctl, expected; + int cdclk, vco; intel_update_cdclk(dev_priv); intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); @@ -1727,262 +1625,65 @@ static void cnl_sanitize_cdclk(struct drm_i915_private *dev_priv) * dividers both synching to an active pipe, or asynchronously * (PIPE_NONE). */ - cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; - - expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); - - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; + cdctl &= ~bxt_cdclk_cd2x_pipe(dev_priv, INVALID_PIPE); -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk.hw.cdclk = 0; + /* Make sure this is a legal cdclk value for the platform */ + cdclk = bxt_calc_cdclk(dev_priv, dev_priv->cdclk.hw.cdclk); + if (cdclk != dev_priv->cdclk.hw.cdclk) + goto sanitize; - /* force full PLL disable + enable */ - dev_priv->cdclk.hw.vco = -1; -} + /* Make sure the VCO is correct for the cdclk */ + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); + if (vco != dev_priv->cdclk.hw.vco) + goto sanitize; -static int icl_calc_cdclk(int min_cdclk, unsigned int ref) -{ - static const int ranges_24[] = { 180000, 192000, 312000, 552000, 648000 }; - static const int ranges_19_38[] = { 172800, 192000, 307200, 556800, 652800 }; - const int *ranges; - int len, i; + expected = skl_cdclk_decimal(cdclk); - switch (ref) { - default: - MISSING_CASE(ref); - /* fall through */ - case 24000: - ranges = ranges_24; - len = ARRAY_SIZE(ranges_24); - break; - case 19200: - case 38400: - ranges = ranges_19_38; - len = ARRAY_SIZE(ranges_19_38); + /* Figure out what CD2X divider we should be using for this cdclk */ + switch (DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.vco, + dev_priv->cdclk.hw.cdclk)) { + case 2: + expected |= BXT_CDCLK_CD2X_DIV_SEL_1; break; - } - - for (i = 0; i < len; i++) { - if (min_cdclk <= ranges[i]) - return ranges[i]; - } - - WARN_ON(min_cdclk > ranges[len - 1]); - return ranges[len - 1]; -} - -static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk.hw.bypass) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - /* fall through */ - case 172800: - case 307200: - case 556800: - case 652800: - WARN_ON(dev_priv->cdclk.hw.ref != 19200 && - dev_priv->cdclk.hw.ref != 38400); + case 3: + expected |= BXT_CDCLK_CD2X_DIV_SEL_1_5; break; - case 180000: - case 312000: - case 552000: - case 648000: - WARN_ON(dev_priv->cdclk.hw.ref != 24000); + case 4: + expected |= BXT_CDCLK_CD2X_DIV_SEL_2; break; - case 192000: - WARN_ON(dev_priv->cdclk.hw.ref != 19200 && - dev_priv->cdclk.hw.ref != 38400 && - dev_priv->cdclk.hw.ref != 24000); + case 8: + expected |= BXT_CDCLK_CD2X_DIV_SEL_4; break; - } - - ratio = cdclk / (dev_priv->cdclk.hw.ref / 2); - - return dev_priv->cdclk.hw.ref * ratio; -} - -static void icl_set_cdclk(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *cdclk_state, - enum pipe pipe) -{ - unsigned int cdclk = cdclk_state->cdclk; - unsigned int vco = cdclk_state->vco; - int ret; - - ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); - if (ret) { - DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", - ret); - return; - } - - if (dev_priv->cdclk.hw.vco != 0 && - dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_disable(dev_priv); - - if (dev_priv->cdclk.hw.vco != vco) - cnl_cdclk_pll_enable(dev_priv, vco); - - /* - * On ICL CD2X_DIV can only be 1, so we'll never end up changing the - * divider here synchronized to a pipe while CDCLK is on, nor will we - * need the corresponding vblank wait. - */ - I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE | - skl_cdclk_decimal(cdclk)); - - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, - cdclk_state->voltage_level); - - intel_update_cdclk(dev_priv); - - /* - * Can't read out the voltage level :( - * Let's just assume everything is as expected. - */ - dev_priv->cdclk.hw.voltage_level = cdclk_state->voltage_level; -} - -static u8 icl_calc_voltage_level(struct drm_i915_private *dev_priv, int cdclk) -{ - if (IS_ELKHARTLAKE(dev_priv)) { - if (cdclk > 312000) - return 2; - else if (cdclk > 180000) - return 1; - else - return 0; - } else { - if (cdclk > 556800) - return 2; - else if (cdclk > 312000) - return 1; - else - return 0; - } -} - -static void icl_get_cdclk(struct drm_i915_private *dev_priv, - struct intel_cdclk_state *cdclk_state) -{ - u32 val; - - cdclk_state->bypass = 50000; - - val = I915_READ(SKL_DSSM); - switch (val & ICL_DSSM_CDCLK_PLL_REFCLK_MASK) { default: - MISSING_CASE(val); - /* fall through */ - case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz: - cdclk_state->ref = 24000; - break; - case ICL_DSSM_CDCLK_PLL_REFCLK_19_2MHz: - cdclk_state->ref = 19200; - break; - case ICL_DSSM_CDCLK_PLL_REFCLK_38_4MHz: - cdclk_state->ref = 38400; - break; - } - - val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0 || - (val & BXT_DE_PLL_LOCK) == 0) { - /* - * CDCLK PLL is disabled, the VCO/ratio doesn't matter, but - * setting it to zero is a way to signal that. - */ - cdclk_state->vco = 0; - cdclk_state->cdclk = cdclk_state->bypass; - goto out; + goto sanitize; } - cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; - - val = I915_READ(CDCLK_CTL); - WARN_ON((val & BXT_CDCLK_CD2X_DIV_SEL_MASK) != 0); - - cdclk_state->cdclk = cdclk_state->vco / 2; - -out: /* - * Can't read this out :( Let's assume it's - * at least what the CDCLK frequency requires. + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. */ - cdclk_state->voltage_level = - icl_calc_voltage_level(dev_priv, cdclk_state->cdclk); -} - -static void icl_init_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state sanitized_state; - u32 val; - - /* This sets dev_priv->cdclk.hw. */ - intel_update_cdclk(dev_priv); - intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); - - /* This means CDCLK disabled. */ - if (dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.bypass) - goto sanitize; - - val = I915_READ(CDCLK_CTL); - - if ((val & BXT_CDCLK_CD2X_DIV_SEL_MASK) != 0) - goto sanitize; - - if ((val & CDCLK_FREQ_DECIMAL_MASK) != - skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk)) - goto sanitize; + if (IS_GEN9_LP(dev_priv) && dev_priv->cdclk.hw.cdclk >= 500000) + expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - return; + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; sanitize: DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - sanitized_state.ref = dev_priv->cdclk.hw.ref; - sanitized_state.cdclk = icl_calc_cdclk(0, sanitized_state.ref); - sanitized_state.vco = icl_calc_cdclk_pll_vco(dev_priv, - sanitized_state.cdclk); - sanitized_state.voltage_level = - icl_calc_voltage_level(dev_priv, - sanitized_state.cdclk); - - icl_set_cdclk(dev_priv, &sanitized_state, INVALID_PIPE); -} - -static void icl_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; - - cdclk_state.cdclk = cdclk_state.bypass; - cdclk_state.vco = 0; - cdclk_state.voltage_level = icl_calc_voltage_level(dev_priv, - cdclk_state.cdclk); + /* force cdclk programming */ + dev_priv->cdclk.hw.cdclk = 0; - icl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); + /* force full PLL disable + enable */ + dev_priv->cdclk.hw.vco = -1; } -static void cnl_init_cdclk(struct drm_i915_private *dev_priv) +static void bxt_init_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state; - cnl_sanitize_cdclk(dev_priv); + bxt_sanitize_cdclk(dev_priv); if (dev_priv->cdclk.hw.cdclk != 0 && dev_priv->cdclk.hw.vco != 0) @@ -1990,22 +1691,29 @@ static void cnl_init_cdclk(struct drm_i915_private *dev_priv) cdclk_state = dev_priv->cdclk.hw; - cdclk_state.cdclk = cnl_calc_cdclk(0); - cdclk_state.vco = cnl_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); - cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); + /* + * FIXME: + * - The initial CDCLK needs to be read from VBT. + * Need to make this change after VBT has changes for BXT. + */ + cdclk_state.cdclk = bxt_calc_cdclk(dev_priv, 0); + cdclk_state.vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk_state.cdclk); + cdclk_state.voltage_level = + dev_priv->display.calc_voltage_level(cdclk_state.cdclk); - cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); + bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } -static void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) +static void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; cdclk_state.cdclk = cdclk_state.bypass; cdclk_state.vco = 0; - cdclk_state.voltage_level = cnl_calc_voltage_level(cdclk_state.cdclk); + cdclk_state.voltage_level = + dev_priv->display.calc_voltage_level(cdclk_state.cdclk); - cnl_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); + bxt_set_cdclk(dev_priv, &cdclk_state, INVALID_PIPE); } /** @@ -2019,14 +1727,10 @@ static void cnl_uninit_cdclk(struct drm_i915_private *dev_priv) */ void intel_cdclk_init(struct drm_i915_private *i915) { - if (INTEL_GEN(i915) >= 11) - icl_init_cdclk(i915); - else if (IS_CANNONLAKE(i915)) - cnl_init_cdclk(i915); + if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) + bxt_init_cdclk(i915); else if (IS_GEN9_BC(i915)) skl_init_cdclk(i915); - else if (IS_GEN9_LP(i915)) - bxt_init_cdclk(i915); } /** @@ -2038,14 +1742,10 @@ void intel_cdclk_init(struct drm_i915_private *i915) */ void intel_cdclk_uninit(struct drm_i915_private *i915) { - if (INTEL_GEN(i915) >= 11) - icl_uninit_cdclk(i915); - else if (IS_CANNONLAKE(i915)) - cnl_uninit_cdclk(i915); + if (INTEL_GEN(i915) >= 10 || IS_GEN9_LP(i915)) + bxt_uninit_cdclk(i915); else if (IS_GEN9_BC(i915)) skl_uninit_cdclk(i915); - else if (IS_GEN9_LP(i915)) - bxt_uninit_cdclk(i915); } /** @@ -2073,9 +1773,9 @@ bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, * Returns: * True if the CDCLK states require just a cd2x divider update, false if not. */ -bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b) +static bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) { /* Older hw doesn't have the capability */ if (INTEL_GEN(dev_priv) < 10 && !IS_GEN9_LP(dev_priv)) @@ -2094,8 +1794,8 @@ bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, * Returns: * True if the CDCLK states don't match, false if they do. */ -bool intel_cdclk_changed(const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b) +static bool intel_cdclk_changed(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) { return intel_cdclk_needs_modeset(a, b) || a->voltage_level != b->voltage_level; @@ -2200,9 +1900,11 @@ intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv, intel_set_cdclk(dev_priv, new_state, pipe); } -static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv, - int pixel_rate) +static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + int pixel_rate = crtc_state->pixel_rate; + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) return DIV_ROUND_UP(pixel_rate, 2); else if (IS_GEN(dev_priv, 9) || @@ -2210,6 +1912,8 @@ static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv, return pixel_rate; else if (IS_CHERRYVIEW(dev_priv)) return DIV_ROUND_UP(pixel_rate * 100, 95); + else if (crtc_state->double_wide) + return DIV_ROUND_UP(pixel_rate * 100, 90 * 2); else return DIV_ROUND_UP(pixel_rate * 100, 90); } @@ -2223,7 +1927,7 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) if (!crtc_state->base.enable) return 0; - min_cdclk = intel_pixel_rate_to_cdclk(dev_priv, crtc_state->pixel_rate); + min_cdclk = intel_pixel_rate_to_cdclk(crtc_state); /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ if (IS_BROADWELL(dev_priv) && hsw_crtc_state_ips_capable(crtc_state)) @@ -2318,6 +2022,10 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) } /* + * Account for port clock min voltage level requirements. + * This only really does something on CNL+ but can be + * called on earlier platforms as well. + * * Note that this functions assumes that 0 is * the lowest voltage value, and higher values * correspond to increasingly higher voltages. @@ -2326,7 +2034,7 @@ static int intel_compute_min_cdclk(struct intel_atomic_state *state) * future platforms this code will need to be * adjusted. */ -static u8 cnl_compute_min_voltage_level(struct intel_atomic_state *state) +static u8 bxt_compute_min_voltage_level(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; @@ -2369,7 +2077,7 @@ static int vlv_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.voltage_level = vlv_calc_voltage_level(dev_priv, cdclk); - if (!state->active_crtcs) { + if (!state->active_pipes) { cdclk = vlv_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); state->cdclk.actual.cdclk = cdclk; @@ -2400,7 +2108,7 @@ static int bdw_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.voltage_level = bdw_calc_voltage_level(cdclk); - if (!state->active_crtcs) { + if (!state->active_pipes) { cdclk = bdw_calc_cdclk(state->cdclk.force_min_cdclk); state->cdclk.actual.cdclk = cdclk; @@ -2470,7 +2178,7 @@ static int skl_modeset_calc_cdclk(struct intel_atomic_state *state) state->cdclk.logical.voltage_level = skl_calc_voltage_level(cdclk); - if (!state->active_crtcs) { + if (!state->active_pipes) { cdclk = skl_calc_cdclk(state->cdclk.force_min_cdclk, vco); state->cdclk.actual.vco = vco; @@ -2493,32 +2201,23 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) if (min_cdclk < 0) return min_cdclk; - if (IS_GEMINILAKE(dev_priv)) { - cdclk = glk_calc_cdclk(min_cdclk); - vco = glk_de_pll_vco(dev_priv, cdclk); - } else { - cdclk = bxt_calc_cdclk(min_cdclk); - vco = bxt_de_pll_vco(dev_priv, cdclk); - } + cdclk = bxt_calc_cdclk(dev_priv, min_cdclk); + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.logical.vco = vco; state->cdclk.logical.cdclk = cdclk; state->cdclk.logical.voltage_level = - bxt_calc_voltage_level(cdclk); - - if (!state->active_crtcs) { - if (IS_GEMINILAKE(dev_priv)) { - cdclk = glk_calc_cdclk(state->cdclk.force_min_cdclk); - vco = glk_de_pll_vco(dev_priv, cdclk); - } else { - cdclk = bxt_calc_cdclk(state->cdclk.force_min_cdclk); - vco = bxt_de_pll_vco(dev_priv, cdclk); - } + max(dev_priv->display.calc_voltage_level(cdclk), + bxt_compute_min_voltage_level(state)); + + if (!state->active_pipes) { + cdclk = bxt_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk); + vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk); state->cdclk.actual.vco = vco; state->cdclk.actual.cdclk = cdclk; state->cdclk.actual.voltage_level = - bxt_calc_voltage_level(cdclk); + dev_priv->display.calc_voltage_level(cdclk); } else { state->cdclk.actual = state->cdclk.logical; } @@ -2526,70 +2225,140 @@ static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state) return 0; } -static int cnl_modeset_calc_cdclk(struct intel_atomic_state *state) +static int intel_lock_all_pipes(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - int min_cdclk, cdclk, vco; + struct intel_crtc *crtc; - min_cdclk = intel_compute_min_cdclk(state); - if (min_cdclk < 0) - return min_cdclk; + /* Add all pipes to the state */ + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state; - cdclk = cnl_calc_cdclk(min_cdclk); - vco = cnl_cdclk_pll_vco(dev_priv, cdclk); + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + } - state->cdclk.logical.vco = vco; - state->cdclk.logical.cdclk = cdclk; - state->cdclk.logical.voltage_level = - max(cnl_calc_voltage_level(cdclk), - cnl_compute_min_voltage_level(state)); + return 0; +} - if (!state->active_crtcs) { - cdclk = cnl_calc_cdclk(state->cdclk.force_min_cdclk); - vco = cnl_cdclk_pll_vco(dev_priv, cdclk); +static int intel_modeset_all_pipes(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct intel_crtc *crtc; - state->cdclk.actual.vco = vco; - state->cdclk.actual.cdclk = cdclk; - state->cdclk.actual.voltage_level = - cnl_calc_voltage_level(cdclk); - } else { - state->cdclk.actual = state->cdclk.logical; + /* + * Add all pipes to the state, and force + * a modeset on all the active ones. + */ + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state; + int ret; + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + if (!crtc_state->base.active || + drm_atomic_crtc_needs_modeset(&crtc_state->base)) + continue; + + crtc_state->base.mode_changed = true; + + ret = drm_atomic_add_affected_connectors(&state->base, + &crtc->base); + if (ret) + return ret; + + ret = drm_atomic_add_affected_planes(&state->base, + &crtc->base); + if (ret) + return ret; + + crtc_state->update_planes |= crtc_state->active_planes; } return 0; } -static int icl_modeset_calc_cdclk(struct intel_atomic_state *state) +static int fixed_modeset_calc_cdclk(struct intel_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - unsigned int ref = state->cdclk.logical.ref; - int min_cdclk, cdclk, vco; + int min_cdclk; + /* + * We can't change the cdclk frequency, but we still want to + * check that the required minimum frequency doesn't exceed + * the actual cdclk frequency. + */ min_cdclk = intel_compute_min_cdclk(state); if (min_cdclk < 0) return min_cdclk; - cdclk = icl_calc_cdclk(min_cdclk, ref); - vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk); + return 0; +} - state->cdclk.logical.vco = vco; - state->cdclk.logical.cdclk = cdclk; - state->cdclk.logical.voltage_level = - max(icl_calc_voltage_level(dev_priv, cdclk), - cnl_compute_min_voltage_level(state)); +int intel_modeset_calc_cdclk(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + enum pipe pipe; + int ret; - if (!state->active_crtcs) { - cdclk = icl_calc_cdclk(state->cdclk.force_min_cdclk, ref); - vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk); + ret = dev_priv->display.modeset_calc_cdclk(state); + if (ret) + return ret; - state->cdclk.actual.vco = vco; - state->cdclk.actual.cdclk = cdclk; - state->cdclk.actual.voltage_level = - icl_calc_voltage_level(dev_priv, cdclk); + /* + * Writes to dev_priv->cdclk.logical must protected by + * holding all the crtc locks, even if we don't end up + * touching the hardware + */ + if (intel_cdclk_changed(&dev_priv->cdclk.logical, + &state->cdclk.logical)) { + ret = intel_lock_all_pipes(state); + if (ret < 0) + return ret; + } + + if (is_power_of_2(state->active_pipes)) { + struct intel_crtc *crtc; + struct intel_crtc_state *crtc_state; + + pipe = ilog2(state->active_pipes); + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); + crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + if (crtc_state && + drm_atomic_crtc_needs_modeset(&crtc_state->base)) + pipe = INVALID_PIPE; } else { - state->cdclk.actual = state->cdclk.logical; + pipe = INVALID_PIPE; + } + + /* All pipes must be switched off while we change the cdclk. */ + if (pipe != INVALID_PIPE && + intel_cdclk_needs_cd2x_update(dev_priv, + &dev_priv->cdclk.actual, + &state->cdclk.actual)) { + ret = intel_lock_all_pipes(state); + if (ret) + return ret; + + state->cdclk.pipe = pipe; + } else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, + &state->cdclk.actual)) { + ret = intel_modeset_all_pipes(state); + if (ret) + return ret; + + state->cdclk.pipe = INVALID_PIPE; } + DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", + state->cdclk.logical.cdclk, + state->cdclk.actual.cdclk); + DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n", + state->cdclk.logical.voltage_level, + state->cdclk.actual.voltage_level); + return 0; } @@ -2809,15 +2578,29 @@ void intel_update_rawclk(struct drm_i915_private *dev_priv) */ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { - if (INTEL_GEN(dev_priv) >= 11) { - dev_priv->display.set_cdclk = icl_set_cdclk; - dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk; + if (IS_ELKHARTLAKE(dev_priv)) { + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = ehl_calc_voltage_level; + dev_priv->cdclk.table = icl_cdclk_table; + } else if (INTEL_GEN(dev_priv) >= 11) { + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = icl_calc_voltage_level; + dev_priv->cdclk.table = icl_cdclk_table; } else if (IS_CANNONLAKE(dev_priv)) { - dev_priv->display.set_cdclk = cnl_set_cdclk; - dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk; + dev_priv->display.set_cdclk = bxt_set_cdclk; + dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = cnl_calc_voltage_level; + dev_priv->cdclk.table = cnl_cdclk_table; } else if (IS_GEN9_LP(dev_priv)) { dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; + dev_priv->display.calc_voltage_level = bxt_calc_voltage_level; + if (IS_GEMINILAKE(dev_priv)) + dev_priv->cdclk.table = glk_cdclk_table; + else + dev_priv->cdclk.table = bxt_cdclk_table; } else if (IS_GEN9_BC(dev_priv)) { dev_priv->display.set_cdclk = skl_set_cdclk; dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk; @@ -2830,13 +2613,11 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv)) { dev_priv->display.set_cdclk = vlv_set_cdclk; dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk; + } else { + dev_priv->display.modeset_calc_cdclk = fixed_modeset_calc_cdclk; } - if (INTEL_GEN(dev_priv) >= 11) - dev_priv->display.get_cdclk = icl_get_cdclk; - else if (IS_CANNONLAKE(dev_priv)) - dev_priv->display.get_cdclk = cnl_get_cdclk; - else if (IS_GEN9_LP(dev_priv)) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEN9_LP(dev_priv)) dev_priv->display.get_cdclk = bxt_get_cdclk; else if (IS_GEN9_BC(dev_priv)) dev_priv->display.get_cdclk = skl_get_cdclk; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h index 4d6f7f5f8930..cf71394cc79c 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.h +++ b/drivers/gpu/drm/i915/display/intel_cdclk.h @@ -15,6 +15,13 @@ struct intel_atomic_state; struct intel_cdclk_state; struct intel_crtc_state; +struct intel_cdclk_vals { + u16 refclk; + u32 cdclk; + u8 divider; /* CD2X divider * 2 */ + u8 ratio; +}; + int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state); void intel_cdclk_init(struct drm_i915_private *i915); void intel_cdclk_uninit(struct drm_i915_private *i915); @@ -22,13 +29,8 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); void intel_update_max_cdclk(struct drm_i915_private *dev_priv); void intel_update_cdclk(struct drm_i915_private *dev_priv); void intel_update_rawclk(struct drm_i915_private *dev_priv); -bool intel_cdclk_needs_cd2x_update(struct drm_i915_private *dev_priv, - const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b); bool intel_cdclk_needs_modeset(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b); -bool intel_cdclk_changed(const struct intel_cdclk_state *a, - const struct intel_cdclk_state *b); void intel_cdclk_swap_state(struct intel_atomic_state *state); void intel_set_cdclk_pre_plane_update(struct drm_i915_private *dev_priv, @@ -42,5 +44,6 @@ intel_set_cdclk_post_plane_update(struct drm_i915_private *dev_priv, enum pipe pipe); void intel_dump_cdclk_state(const struct intel_cdclk_state *cdclk_state, const char *context); +int intel_modeset_calc_cdclk(struct intel_atomic_state *state); #endif /* __INTEL_CDCLK_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 71a0201437a9..fa44eb73d088 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -43,6 +43,21 @@ #define LEGACY_LUT_LENGTH 256 /* + * ILK+ csc matrix: + * + * |R/Cr| | c0 c1 c2 | ( |R/Cr| |preoff0| ) |postoff0| + * |G/Y | = | c3 c4 c5 | x ( |G/Y | + |preoff1| ) + |postoff1| + * |B/Cb| | c6 c7 c8 | ( |B/Cb| |preoff2| ) |postoff2| + * + * ILK/SNB don't have explicit post offsets, and instead + * CSC_MODE_YUV_TO_RGB and CSC_BLACK_SCREEN_OFFSET are used: + * CSC_MODE_YUV_TO_RGB=0 + CSC_BLACK_SCREEN_OFFSET=0 -> 1/2, 0, 1/2 + * CSC_MODE_YUV_TO_RGB=0 + CSC_BLACK_SCREEN_OFFSET=1 -> 1/2, 1/16, 1/2 + * CSC_MODE_YUV_TO_RGB=1 + CSC_BLACK_SCREEN_OFFSET=0 -> 0, 0, 0 + * CSC_MODE_YUV_TO_RGB=1 + CSC_BLACK_SCREEN_OFFSET=1 -> 1/16, 1/16, 1/16 + */ + +/* * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point * format). This macro takes the coefficient we want transformed and the * number of fractional bits. @@ -59,37 +74,38 @@ #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255) +/* Nop pre/post offsets */ static const u16 ilk_csc_off_zero[3] = {}; +/* Identity matrix */ static const u16 ilk_csc_coeff_identity[9] = { ILK_CSC_COEFF_1_0, 0, 0, 0, ILK_CSC_COEFF_1_0, 0, 0, 0, ILK_CSC_COEFF_1_0, }; +/* Limited range RGB post offsets */ static const u16 ilk_csc_postoff_limited_range[3] = { ILK_CSC_POSTOFF_LIMITED_RANGE, ILK_CSC_POSTOFF_LIMITED_RANGE, ILK_CSC_POSTOFF_LIMITED_RANGE, }; +/* Full range RGB -> limited range RGB matrix */ static const u16 ilk_csc_coeff_limited_range[9] = { ILK_CSC_COEFF_LIMITED_RANGE, 0, 0, 0, ILK_CSC_COEFF_LIMITED_RANGE, 0, 0, 0, ILK_CSC_COEFF_LIMITED_RANGE, }; -/* - * These values are direct register values specified in the Bspec, - * for RGB->YUV conversion matrix (colorspace BT709) - */ +/* BT.709 full range RGB -> limited range YCbCr matrix */ static const u16 ilk_csc_coeff_rgb_to_ycbcr[9] = { 0x1e08, 0x9cc0, 0xb528, 0x2ba8, 0x09d8, 0x37e8, 0xbce8, 0x9ad8, 0x1e08, }; -/* Post offset values for RGB->YCBCR conversion */ +/* Limited range YCbCr post offsets */ static const u16 ilk_csc_postoff_rgb_to_ycbcr[3] = { 0x0800, 0x0100, 0x0800, }; @@ -611,12 +627,13 @@ static void bdw_load_lut_10(struct intel_crtc *crtc, static void ivb_load_lut_ext_max(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; /* Program the max register to clamp values > 1.0. */ - I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16); - I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16); - I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16); /* * Program the gc max 2 register to clamp values > 1.0. @@ -624,10 +641,15 @@ static void ivb_load_lut_ext_max(struct intel_crtc *crtc) * from 3.0 to 7.0 */ if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { - I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 0), 1 << 16); - I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 1), 1 << 16); - I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 2), 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 0), + 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 1), + 1 << 16); + intel_dsb_reg_write(dsb, PREC_PAL_EXT2_GC_MAX(pipe, 2), + 1 << 16); } + + intel_dsb_put(dsb); } static void ivb_load_luts(const struct intel_crtc_state *crtc_state) @@ -787,78 +809,83 @@ icl_load_gcmax(const struct intel_crtc_state *crtc_state, const struct drm_color_lut *color) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ - I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red); - I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green); - I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue); + intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 0), color->red); + intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 1), color->green); + intel_dsb_reg_write(dsb, PREC_PAL_GC_MAX(pipe, 2), color->blue); + intel_dsb_put(dsb); } static void icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct drm_property_blob *blob = crtc_state->base.gamma_lut; const struct drm_color_lut *lut = blob->data; + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; u32 i; /* - * Every entry in the multi-segment LUT is corresponding to a superfine - * segment step which is 1/(8 * 128 * 256). + * Program Super Fine segment (let's call it seg1)... * - * Superfine segment has 9 entries, corresponding to values - * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). + * Super Fine segment's step is 1/(8 * 128 * 256) and it has + * 9 entries, corresponding to values 0, 1/(8 * 128 * 256), + * 2/(8 * 128 * 256) ... 8/(8 * 128 * 256). */ - I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); + intel_dsb_reg_write(dsb, PREC_PAL_MULTI_SEG_INDEX(pipe), + PAL_PREC_AUTO_INCREMENT); for (i = 0; i < 9; i++) { const struct drm_color_lut *entry = &lut[i]; - I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), - ilk_lut_12p4_ldw(entry)); - I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), - ilk_lut_12p4_udw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_udw(entry)); } + + intel_dsb_put(dsb); } static void icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct drm_property_blob *blob = crtc_state->base.gamma_lut; const struct drm_color_lut *lut = blob->data; const struct drm_color_lut *entry; + struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; u32 i; /* - * * Program Fine segment (let's call it seg2)... * - * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256), 2/(128*256) - * ... 256/(128*256). So in order to program fine segment of LUT we - * need to pick every 8'th entry in LUT, and program 256 indexes. + * Fine segment's step is 1/(128 * 256) i.e. 1/(128 * 256), 2/(128 * 256) + * ... 256/(128 * 256). So in order to program fine segment of LUT we + * need to pick every 8th entry in the LUT, and program 256 indexes. * * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1], - * with seg2[0] being unused by the hardware. + * seg2[0] being unused by the hardware. */ - I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); + intel_dsb_reg_write(dsb, PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); for (i = 1; i < 257; i++) { entry = &lut[i * 8]; - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_udw(entry)); } /* * Program Coarse segment (let's call it seg3)... * - * Coarse segment's starts from index 0 and it's step is 1/256 ie 0, - * 1/256, 2/256 ...256/256. As per the description of each entry in LUT + * Coarse segment starts from index 0 and it's step is 1/256 ie 0, + * 1/256, 2/256 ... 256/256. As per the description of each entry in LUT * above, we need to pick every (8 * 128)th entry in LUT, and * program 256 of those. * @@ -868,20 +895,24 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) */ for (i = 0; i < 256; i++) { entry = &lut[i * 8 * 128]; - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + intel_dsb_indexed_reg_write(dsb, PREC_PAL_DATA(pipe), + ilk_lut_12p4_udw(entry)); } /* The last entry in the LUT is to be programmed in GCMAX */ entry = &lut[256 * 8 * 128]; icl_load_gcmax(crtc_state, entry); ivb_load_lut_ext_max(crtc); + intel_dsb_put(dsb); } static void icl_load_luts(const struct intel_crtc_state *crtc_state) { const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct intel_dsb *dsb = intel_dsb_get(crtc); if (crtc_state->base.degamma_lut) glk_load_degamma_lut(crtc_state); @@ -890,16 +921,17 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) case GAMMA_MODE_MODE_8BIT: i9xx_load_luts(crtc_state); break; - case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: icl_program_gamma_superfine_segment(crtc_state); icl_program_gamma_multi_segment(crtc_state); break; - default: bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); ivb_load_lut_ext_max(crtc); } + + intel_dsb_commit(dsb); + intel_dsb_put(dsb); } static u32 chv_cgm_degamma_ldw(const struct drm_color_lut *color) @@ -1197,6 +1229,21 @@ static u32 ilk_gamma_mode(const struct intel_crtc_state *crtc_state) return GAMMA_MODE_MODE_10BIT; } +static u32 ilk_csc_mode(const struct intel_crtc_state *crtc_state) +{ + /* + * CSC comes after the LUT in RGB->YCbCr mode. + * RGB->YCbCr needs the limited range offsets added to + * the output. RGB limited range output is handled by + * the hw automagically elsewhere. + */ + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + return CSC_BLACK_SCREEN_OFFSET; + + return CSC_MODE_YUV_TO_RGB | + CSC_POSITION_BEFORE_GAMMA; +} + static int ilk_color_check(struct intel_crtc_state *crtc_state) { int ret; @@ -1210,15 +1257,15 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state) !crtc_state->c8_planes; /* - * We don't expose the ctm on ilk/snb currently, - * nor do we enable YCbCr output. Also RGB limited - * range output is handled by the hw automagically. + * We don't expose the ctm on ilk/snb currently, also RGB + * limited range output is handled by the hw automagically. */ - crtc_state->csc_enable = false; + crtc_state->csc_enable = + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB; crtc_state->gamma_mode = ilk_gamma_mode(crtc_state); - crtc_state->csc_mode = 0; + crtc_state->csc_mode = ilk_csc_mode(crtc_state); ret = intel_color_add_affected_planes(crtc_state); if (ret) @@ -1371,6 +1418,403 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) return 0; } +static int i9xx_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return 0; + + switch (crtc_state->gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + return 8; + case GAMMA_MODE_MODE_10BIT: + return 16; + default: + MISSING_CASE(crtc_state->gamma_mode); + return 0; + } +} + +static int ilk_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return 0; + + if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) + return 0; + + switch (crtc_state->gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + return 8; + case GAMMA_MODE_MODE_10BIT: + return 10; + default: + MISSING_CASE(crtc_state->gamma_mode); + return 0; + } +} + +static int chv_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) + return 10; + else + return i9xx_gamma_precision(crtc_state); +} + +static int glk_gamma_precision(const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return 0; + + switch (crtc_state->gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + return 8; + case GAMMA_MODE_MODE_10BIT: + return 10; + default: + MISSING_CASE(crtc_state->gamma_mode); + return 0; + } +} + +int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (HAS_GMCH(dev_priv)) { + if (IS_CHERRYVIEW(dev_priv)) + return chv_gamma_precision(crtc_state); + else + return i9xx_gamma_precision(crtc_state); + } else { + if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + return glk_gamma_precision(crtc_state); + else if (IS_IRONLAKE(dev_priv)) + return ilk_gamma_precision(crtc_state); + } + + return 0; +} + +static bool err_check(struct drm_color_lut *lut1, + struct drm_color_lut *lut2, u32 err) +{ + return ((abs((long)lut2->red - lut1->red)) <= err) && + ((abs((long)lut2->blue - lut1->blue)) <= err) && + ((abs((long)lut2->green - lut1->green)) <= err); +} + +static bool intel_color_lut_entry_equal(struct drm_color_lut *lut1, + struct drm_color_lut *lut2, + int lut_size, u32 err) +{ + int i; + + for (i = 0; i < lut_size; i++) { + if (!err_check(&lut1[i], &lut2[i], err)) + return false; + } + + return true; +} + +bool intel_color_lut_equal(struct drm_property_blob *blob1, + struct drm_property_blob *blob2, + u32 gamma_mode, u32 bit_precision) +{ + struct drm_color_lut *lut1, *lut2; + int lut_size1, lut_size2; + u32 err; + + if (!blob1 != !blob2) + return false; + + if (!blob1) + return true; + + lut_size1 = drm_color_lut_size(blob1); + lut_size2 = drm_color_lut_size(blob2); + + /* check sw and hw lut size */ + switch (gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + case GAMMA_MODE_MODE_10BIT: + if (lut_size1 != lut_size2) + return false; + break; + default: + MISSING_CASE(gamma_mode); + return false; + } + + lut1 = blob1->data; + lut2 = blob2->data; + + err = 0xffff >> bit_precision; + + /* check sw and hw lut entry to be equal */ + switch (gamma_mode) { + case GAMMA_MODE_MODE_8BIT: + case GAMMA_MODE_MODE_10BIT: + if (!intel_color_lut_entry_equal(lut1, lut2, + lut_size2, err)) + return false; + break; + default: + MISSING_CASE(gamma_mode); + return false; + } + + return true; +} + +/* convert hw value with given bit_precision to lut property val */ +static u32 intel_color_lut_pack(u32 val, u32 bit_precision) +{ + u32 max = 0xffff >> (16 - bit_precision); + + val = clamp_val(val, 0, max); + + if (bit_precision < 16) + val <<= 16 - bit_precision; + + return val; +} + +static struct drm_property_blob * +i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * LEGACY_LUT_LENGTH, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < LEGACY_LUT_LENGTH; i++) { + if (HAS_GMCH(dev_priv)) + val = I915_READ(PALETTE(pipe, i)); + else + val = I915_READ(LGC_PALETTE(pipe, i)); + + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_RED_MASK, val), 8); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_GREEN_MASK, val), 8); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_BLUE_MASK, val), 8); + } + + return blob; +} + +static void i9xx_read_luts(struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return; + + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); +} + +static struct drm_property_blob * +i965_read_lut_10p6(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val1, val2; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < lut_size - 1; i++) { + val1 = I915_READ(PALETTE(pipe, 2 * i + 0)); + val2 = I915_READ(PALETTE(pipe, 2 * i + 1)); + + blob_data[i].red = REG_FIELD_GET(PALETTE_RED_MASK, val2) << 8 | + REG_FIELD_GET(PALETTE_RED_MASK, val1); + blob_data[i].green = REG_FIELD_GET(PALETTE_GREEN_MASK, val2) << 8 | + REG_FIELD_GET(PALETTE_GREEN_MASK, val1); + blob_data[i].blue = REG_FIELD_GET(PALETTE_BLUE_MASK, val2) << 8 | + REG_FIELD_GET(PALETTE_BLUE_MASK, val1); + } + + blob_data[i].red = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + I915_READ(PIPEGCMAX(pipe, 0))); + blob_data[i].green = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + I915_READ(PIPEGCMAX(pipe, 1))); + blob_data[i].blue = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + I915_READ(PIPEGCMAX(pipe, 2))); + + return blob; +} + +static void i965_read_luts(struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return; + + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); + else + crtc_state->base.gamma_lut = i965_read_lut_10p6(crtc_state); +} + +static struct drm_property_blob * +chv_read_cgm_lut(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < lut_size; i++) { + val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 0)); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + CGM_PIPE_GAMMA_GREEN_MASK, val), 10); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + CGM_PIPE_GAMMA_BLUE_MASK, val), 10); + + val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 1)); + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + CGM_PIPE_GAMMA_RED_MASK, val), 10); + } + + return blob; +} + +static void chv_read_luts(struct intel_crtc_state *crtc_state) +{ + if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) + crtc_state->base.gamma_lut = chv_read_cgm_lut(crtc_state); + else + i965_read_luts(crtc_state); +} + +static struct drm_property_blob * +ilk_read_lut_10(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < lut_size; i++) { + val = I915_READ(PREC_PALETTE(pipe, i)); + + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + PREC_PALETTE_RED_MASK, val), 10); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + PREC_PALETTE_GREEN_MASK, val), 10); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + PREC_PALETTE_BLUE_MASK, val), 10); + } + + return blob; +} + +static void ilk_read_luts(struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return; + + if ((crtc_state->csc_mode & CSC_POSITION_BEFORE_GAMMA) == 0) + return; + + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); + else + crtc_state->base.gamma_lut = ilk_read_lut_10(crtc_state); +} + +static struct drm_property_blob * +glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + int hw_lut_size = ivb_lut_10_size(prec_index); + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * hw_lut_size, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + I915_WRITE(PREC_PAL_INDEX(pipe), prec_index | + PAL_PREC_AUTO_INCREMENT); + + for (i = 0; i < hw_lut_size; i++) { + val = I915_READ(PREC_PAL_DATA(pipe)); + + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + PREC_PAL_DATA_RED_MASK, val), 10); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + PREC_PAL_DATA_GREEN_MASK, val), 10); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + PREC_PAL_DATA_BLUE_MASK, val), 10); + } + + I915_WRITE(PREC_PAL_INDEX(pipe), 0); + + return blob; +} + +static void glk_read_luts(struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->gamma_enable) + return; + + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) + crtc_state->base.gamma_lut = i9xx_read_lut_8(crtc_state); + else + crtc_state->base.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0)); +} + void intel_color_init(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1383,14 +1827,17 @@ void intel_color_init(struct intel_crtc *crtc) dev_priv->display.color_check = chv_color_check; dev_priv->display.color_commit = i9xx_color_commit; dev_priv->display.load_luts = chv_load_luts; + dev_priv->display.read_luts = chv_read_luts; } else if (INTEL_GEN(dev_priv) >= 4) { dev_priv->display.color_check = i9xx_color_check; dev_priv->display.color_commit = i9xx_color_commit; dev_priv->display.load_luts = i965_load_luts; + dev_priv->display.read_luts = i965_read_luts; } else { dev_priv->display.color_check = i9xx_color_check; dev_priv->display.color_commit = i9xx_color_commit; dev_priv->display.load_luts = i9xx_load_luts; + dev_priv->display.read_luts = i9xx_read_luts; } } else { if (INTEL_GEN(dev_priv) >= 11) @@ -1409,16 +1856,19 @@ void intel_color_init(struct intel_crtc *crtc) else dev_priv->display.color_commit = ilk_color_commit; - if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 11) { dev_priv->display.load_luts = icl_load_luts; - else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) + } else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv)) { dev_priv->display.load_luts = glk_load_luts; - else if (INTEL_GEN(dev_priv) >= 8) + dev_priv->display.read_luts = glk_read_luts; + } else if (INTEL_GEN(dev_priv) >= 8) { dev_priv->display.load_luts = bdw_load_luts; - else if (INTEL_GEN(dev_priv) >= 7) + } else if (INTEL_GEN(dev_priv) >= 7) { dev_priv->display.load_luts = ivb_load_luts; - else + } else { dev_priv->display.load_luts = ilk_load_luts; + dev_priv->display.read_luts = ilk_read_luts; + } } drm_crtc_enable_color_mgmt(&crtc->base, diff --git a/drivers/gpu/drm/i915/display/intel_color.h b/drivers/gpu/drm/i915/display/intel_color.h index 057e8ac63555..173727aaa24d 100644 --- a/drivers/gpu/drm/i915/display/intel_color.h +++ b/drivers/gpu/drm/i915/display/intel_color.h @@ -6,13 +6,20 @@ #ifndef __INTEL_COLOR_H__ #define __INTEL_COLOR_H__ +#include <linux/types.h> + struct intel_crtc_state; struct intel_crtc; +struct drm_property_blob; void intel_color_init(struct intel_crtc *crtc); int intel_color_check(struct intel_crtc_state *crtc_state); void intel_color_commit(const struct intel_crtc_state *crtc_state); void intel_color_load_luts(const struct intel_crtc_state *crtc_state); void intel_color_get_config(struct intel_crtc_state *crtc_state); +int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state); +bool intel_color_lut_equal(struct drm_property_blob *blob1, + struct drm_property_blob *blob2, + u32 gamma_mode, u32 bit_precision); #endif /* __INTEL_COLOR_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c index ba2ef165a01a..1133c4e97bb4 100644 --- a/drivers/gpu/drm/i915/display/intel_connector.c +++ b/drivers/gpu/drm/i915/display/intel_connector.c @@ -277,7 +277,22 @@ intel_attach_aspect_ratio_property(struct drm_connector *connector) void intel_attach_colorspace_property(struct drm_connector *connector) { - if (!drm_mode_create_hdmi_colorspace_property(connector)) - drm_object_attach_property(&connector->base, - connector->colorspace_property, 0); + switch (connector->connector_type) { + case DRM_MODE_CONNECTOR_HDMIA: + case DRM_MODE_CONNECTOR_HDMIB: + if (drm_mode_create_hdmi_colorspace_property(connector)) + return; + break; + case DRM_MODE_CONNECTOR_DisplayPort: + case DRM_MODE_CONNECTOR_eDP: + if (drm_mode_create_dp_colorspace_property(connector)) + return; + break; + default: + DRM_DEBUG_KMS("Colorspace property not supported\n"); + return; + } + + drm_object_attach_property(&connector->base, + connector->colorspace_property, 0); } diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c index e6e8d4a82044..ff6126ea793c 100644 --- a/drivers/gpu/drm/i915/display/intel_crt.c +++ b/drivers/gpu/drm/i915/display/intel_crt.c @@ -994,9 +994,9 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->base.type = INTEL_OUTPUT_ANALOG; crt->base.cloneable = (1 << INTEL_OUTPUT_DVO) | (1 << INTEL_OUTPUT_HDMI); if (IS_I830(dev_priv)) - crt->base.crtc_mask = (1 << 0); + crt->base.crtc_mask = BIT(PIPE_A); else - crt->base.crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + crt->base.crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); if (IS_GEN(dev_priv, 2)) connector->interlace_allowed = 0; diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 8eb2b3ec01ed..9ba794cb9b4f 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -45,6 +45,7 @@ #include "intel_lspcon.h" #include "intel_panel.h" #include "intel_psr.h" +#include "intel_sprite.h" #include "intel_tc.h" #include "intel_vdsc.h" @@ -586,6 +587,26 @@ static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations[] = { { 0x0, 0x00, 0x00 }, /* 3 0 */ }; +struct tgl_dkl_phy_ddi_buf_trans { + u32 dkl_vswing_control; + u32 dkl_preshoot_control; + u32 dkl_de_emphasis_control; +}; + +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = { + /* VS pre-emp Non-trans mV Pre-emph dB */ + { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ + { 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */ + { 0x2, 0x0, 0x0b }, /* 0 2 400mV 6 dB */ + { 0x0, 0x0, 0x19 }, /* 0 3 400mV 9.5 dB */ + { 0x5, 0x0, 0x00 }, /* 1 0 600mV 0 dB */ + { 0x2, 0x0, 0x03 }, /* 1 1 600mV 3.5 dB */ + { 0x0, 0x0, 0x14 }, /* 1 2 600mV 6 dB */ + { 0x2, 0x0, 0x00 }, /* 2 0 800mV 0 dB */ + { 0x0, 0x0, 0x0B }, /* 2 1 800mV 3.5 dB */ + { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ +}; + static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { @@ -872,7 +893,14 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; - if (INTEL_GEN(dev_priv) >= 11) { + if (INTEL_GEN(dev_priv) >= 12) { + if (intel_phy_is_combo(dev_priv, phy)) + icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, + 0, &n_entries); + else + n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + default_entry = n_entries - 1; + } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, 0, &n_entries); @@ -1049,6 +1077,8 @@ static u32 icl_pll_to_ddi_clk_sel(struct intel_encoder *encoder, case DPLL_ID_ICL_MGPLL2: case DPLL_ID_ICL_MGPLL3: case DPLL_ID_ICL_MGPLL4: + case DPLL_ID_TGL_MGPLL5: + case DPLL_ID_TGL_MGPLL6: return DDI_CLK_SEL_MG; } } @@ -1413,11 +1443,30 @@ static int icl_calc_mg_pll_link(struct drm_i915_private *dev_priv, ref_clock = dev_priv->cdclk.hw.ref; - m1 = pll_state->mg_pll_div1 & MG_PLL_DIV1_FBPREDIV_MASK; - m2_int = pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK; - m2_frac = (pll_state->mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) ? - (pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_FRAC_MASK) >> - MG_PLL_DIV0_FBDIV_FRAC_SHIFT : 0; + if (INTEL_GEN(dev_priv) >= 12) { + m1 = pll_state->mg_pll_div0 & DKL_PLL_DIV0_FBPREDIV_MASK; + m1 = m1 >> DKL_PLL_DIV0_FBPREDIV_SHIFT; + m2_int = pll_state->mg_pll_div0 & DKL_PLL_DIV0_FBDIV_INT_MASK; + + if (pll_state->mg_pll_bias & DKL_PLL_BIAS_FRAC_EN_H) { + m2_frac = pll_state->mg_pll_bias & + DKL_PLL_BIAS_FBDIV_FRAC_MASK; + m2_frac = m2_frac >> DKL_PLL_BIAS_FBDIV_SHIFT; + } else { + m2_frac = 0; + } + } else { + m1 = pll_state->mg_pll_div1 & MG_PLL_DIV1_FBPREDIV_MASK; + m2_int = pll_state->mg_pll_div0 & MG_PLL_DIV0_FBDIV_INT_MASK; + + if (pll_state->mg_pll_div0 & MG_PLL_DIV0_FRACNEN_H) { + m2_frac = pll_state->mg_pll_div0 & + MG_PLL_DIV0_FBDIV_FRAC_MASK; + m2_frac = m2_frac >> MG_PLL_DIV0_FBDIV_FRAC_SHIFT; + } else { + m2_frac = 0; + } + } switch (pll_state->mg_clktop2_hsclkctl & MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK) { @@ -1692,7 +1741,8 @@ static void intel_ddi_clock_get(struct intel_encoder *encoder, hsw_ddi_clock_get(encoder, pipe_config); } -void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) +void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1704,44 +1754,52 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) WARN_ON(transcoder_is_dsi(cpu_transcoder)); - temp = TRANS_MSA_SYNC_CLK; - - if (crtc_state->limited_color_range) - temp |= TRANS_MSA_CEA_RANGE; + temp = DP_MSA_MISC_SYNC_CLOCK; switch (crtc_state->pipe_bpp) { case 18: - temp |= TRANS_MSA_6_BPC; + temp |= DP_MSA_MISC_6_BPC; break; case 24: - temp |= TRANS_MSA_8_BPC; + temp |= DP_MSA_MISC_8_BPC; break; case 30: - temp |= TRANS_MSA_10_BPC; + temp |= DP_MSA_MISC_10_BPC; break; case 36: - temp |= TRANS_MSA_12_BPC; + temp |= DP_MSA_MISC_12_BPC; break; default: MISSING_CASE(crtc_state->pipe_bpp); break; } + /* nonsense combination */ + WARN_ON(crtc_state->limited_color_range && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB); + + if (crtc_state->limited_color_range) + temp |= DP_MSA_MISC_COLOR_CEA_RGB; + /* * As per DP 1.2 spec section 2.3.4.3 while sending * YCBCR 444 signals we should program MSA MISC1/0 fields with - * colorspace information. The output colorspace encoding is BT601. + * colorspace information. */ if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444) - temp |= TRANS_MSA_SAMPLING_444 | TRANS_MSA_CLRSP_YCBCR; + temp |= DP_MSA_MISC_COLOR_YCBCR_444_BT709; + /* * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication * of Color Encoding Format and Content Color Gamut] while sending - * YCBCR 420 signals we should program MSA MISC1 fields which - * indicate VSC SDP for the Pixel Encoding/Colorimetry Format. + * YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields + * which indicate VSC SDP for the Pixel Encoding/Colorimetry Format. + * + * FIXME MST doesn't pass in the conn_state */ - if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) - temp |= TRANS_MSA_USE_VSC_SDP; + if (conn_state && intel_dp_needs_vsc_sdp(crtc_state, conn_state)) + temp |= DP_MSA_MISC_COLOR_VSC_SDP; + I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); } @@ -1761,7 +1819,14 @@ void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } -void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) +/* + * Returns the TRANS_DDI_FUNC_CTL value based on CRTC state. + * + * Only intended to be used by intel_ddi_enable_transcoder_func() and + * intel_ddi_config_transcoder_func(). + */ +static u32 +intel_ddi_transcoder_func_reg_val_get(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct intel_encoder *encoder = intel_ddi_get_crtc_encoder(crtc); @@ -1845,6 +1910,34 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } + return temp; +} + +void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + u32 temp; + + temp = intel_ddi_transcoder_func_reg_val_get(crtc_state); + I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); +} + +/* + * Same as intel_ddi_enable_transcoder_func(), but it does not set the enable + * bit. + */ +static void +intel_ddi_config_transcoder_func(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + u32 temp; + + temp = intel_ddi_transcoder_func_reg_val_get(crtc_state); + temp &= ~TRANS_DDI_FUNC_ENABLE; I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } @@ -2045,18 +2138,20 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, } if (!*pipe_mask) - DRM_DEBUG_KMS("No pipe for ddi port %c found\n", - port_name(port)); + DRM_DEBUG_KMS("No pipe for [ENCODER:%d:%s] found\n", + encoder->base.base.id, encoder->base.name); if (!mst_pipe_mask && hweight8(*pipe_mask) > 1) { - DRM_DEBUG_KMS("Multiple pipes for non DP-MST port %c (pipe_mask %02x)\n", - port_name(port), *pipe_mask); + DRM_DEBUG_KMS("Multiple pipes for [ENCODER:%d:%s] (pipe_mask %02x)\n", + encoder->base.base.id, encoder->base.name, + *pipe_mask); *pipe_mask = BIT(ffs(*pipe_mask) - 1); } if (mst_pipe_mask && mst_pipe_mask != *pipe_mask) - DRM_DEBUG_KMS("Conflicting MST and non-MST encoders for port %c (pipe_mask %02x mst_pipe_mask %02x)\n", - port_name(port), *pipe_mask, mst_pipe_mask); + DRM_DEBUG_KMS("Conflicting MST and non-MST state for [ENCODER:%d:%s] (pipe_mask %02x mst_pipe_mask %02x)\n", + encoder->base.base.id, encoder->base.name, + *pipe_mask, mst_pipe_mask); else *is_dp_mst = mst_pipe_mask; @@ -2066,8 +2161,9 @@ out: if ((tmp & (BXT_PHY_CMNLANE_POWERDOWN_ACK | BXT_PHY_LANE_POWERDOWN_ACK | BXT_PHY_LANE_ENABLED)) != BXT_PHY_LANE_ENABLED) - DRM_ERROR("Port %c enabled but PHY powered down? " - "(PHY_CTL %08x)\n", port_name(port), tmp); + DRM_ERROR("[ENCODER:%d:%s] enabled but PHY powered down? " + "(PHY_CTL %08x)\n", encoder->base.base.id, + encoder->base.name, tmp); } intel_display_power_put(dev_priv, encoder->power_domain, wakeref); @@ -2269,7 +2365,13 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) enum phy phy = intel_port_to_phy(dev_priv, port); int n_entries; - if (INTEL_GEN(dev_priv) >= 11) { + if (INTEL_GEN(dev_priv) >= 12) { + if (intel_phy_is_combo(dev_priv, phy)) + icl_get_combo_buf_trans(dev_priv, encoder->type, + intel_dp->link_rate, &n_entries); + else + n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) icl_get_combo_buf_trans(dev_priv, encoder->type, intel_dp->link_rate, &n_entries); @@ -2583,7 +2685,7 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); const struct icl_mg_phy_ddi_buf_trans *ddi_translations; u32 n_entries, val; int ln; @@ -2599,33 +2701,33 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, /* Set MG_TX_LINK_PARAMS cri_use_fs32 to 0. */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_LINK_PARAMS(ln, port)); + val = I915_READ(MG_TX1_LINK_PARAMS(ln, tc_port)); val &= ~CRI_USE_FS32; - I915_WRITE(MG_TX1_LINK_PARAMS(ln, port), val); + I915_WRITE(MG_TX1_LINK_PARAMS(ln, tc_port), val); - val = I915_READ(MG_TX2_LINK_PARAMS(ln, port)); + val = I915_READ(MG_TX2_LINK_PARAMS(ln, tc_port)); val &= ~CRI_USE_FS32; - I915_WRITE(MG_TX2_LINK_PARAMS(ln, port), val); + I915_WRITE(MG_TX2_LINK_PARAMS(ln, tc_port), val); } /* Program MG_TX_SWINGCTRL with values from vswing table */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_SWINGCTRL(ln, port)); + val = I915_READ(MG_TX1_SWINGCTRL(ln, tc_port)); val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK; val |= CRI_TXDEEMPH_OVERRIDE_17_12( ddi_translations[level].cri_txdeemph_override_17_12); - I915_WRITE(MG_TX1_SWINGCTRL(ln, port), val); + I915_WRITE(MG_TX1_SWINGCTRL(ln, tc_port), val); - val = I915_READ(MG_TX2_SWINGCTRL(ln, port)); + val = I915_READ(MG_TX2_SWINGCTRL(ln, tc_port)); val &= ~CRI_TXDEEMPH_OVERRIDE_17_12_MASK; val |= CRI_TXDEEMPH_OVERRIDE_17_12( ddi_translations[level].cri_txdeemph_override_17_12); - I915_WRITE(MG_TX2_SWINGCTRL(ln, port), val); + I915_WRITE(MG_TX2_SWINGCTRL(ln, tc_port), val); } /* Program MG_TX_DRVCTRL with values from vswing table */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_DRVCTRL(ln, port)); + val = I915_READ(MG_TX1_DRVCTRL(ln, tc_port)); val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK | CRI_TXDEEMPH_OVERRIDE_5_0_MASK); val |= CRI_TXDEEMPH_OVERRIDE_5_0( @@ -2633,9 +2735,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, CRI_TXDEEMPH_OVERRIDE_11_6( ddi_translations[level].cri_txdeemph_override_11_6) | CRI_TXDEEMPH_OVERRIDE_EN; - I915_WRITE(MG_TX1_DRVCTRL(ln, port), val); + I915_WRITE(MG_TX1_DRVCTRL(ln, tc_port), val); - val = I915_READ(MG_TX2_DRVCTRL(ln, port)); + val = I915_READ(MG_TX2_DRVCTRL(ln, tc_port)); val &= ~(CRI_TXDEEMPH_OVERRIDE_11_6_MASK | CRI_TXDEEMPH_OVERRIDE_5_0_MASK); val |= CRI_TXDEEMPH_OVERRIDE_5_0( @@ -2643,7 +2745,7 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, CRI_TXDEEMPH_OVERRIDE_11_6( ddi_translations[level].cri_txdeemph_override_11_6) | CRI_TXDEEMPH_OVERRIDE_EN; - I915_WRITE(MG_TX2_DRVCTRL(ln, port), val); + I915_WRITE(MG_TX2_DRVCTRL(ln, tc_port), val); /* FIXME: Program CRI_LOADGEN_SEL after the spec is updated */ } @@ -2654,17 +2756,17 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, * values from table for which TX1 and TX2 enabled. */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_CLKHUB(ln, port)); + val = I915_READ(MG_CLKHUB(ln, tc_port)); if (link_clock < 300000) val |= CFG_LOW_RATE_LKREN_EN; else val &= ~CFG_LOW_RATE_LKREN_EN; - I915_WRITE(MG_CLKHUB(ln, port), val); + I915_WRITE(MG_CLKHUB(ln, tc_port), val); } /* Program the MG_TX_DCC<LN, port being used> based on the link frequency */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_DCC(ln, port)); + val = I915_READ(MG_TX1_DCC(ln, tc_port)); val &= ~CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK; if (link_clock <= 500000) { val &= ~CFG_AMI_CK_DIV_OVERRIDE_EN; @@ -2672,9 +2774,9 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val |= CFG_AMI_CK_DIV_OVERRIDE_EN | CFG_AMI_CK_DIV_OVERRIDE_VAL(1); } - I915_WRITE(MG_TX1_DCC(ln, port), val); + I915_WRITE(MG_TX1_DCC(ln, tc_port), val); - val = I915_READ(MG_TX2_DCC(ln, port)); + val = I915_READ(MG_TX2_DCC(ln, tc_port)); val &= ~CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK; if (link_clock <= 500000) { val &= ~CFG_AMI_CK_DIV_OVERRIDE_EN; @@ -2682,18 +2784,18 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, val |= CFG_AMI_CK_DIV_OVERRIDE_EN | CFG_AMI_CK_DIV_OVERRIDE_VAL(1); } - I915_WRITE(MG_TX2_DCC(ln, port), val); + I915_WRITE(MG_TX2_DCC(ln, tc_port), val); } /* Program MG_TX_PISO_READLOAD with values from vswing table */ for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_TX1_PISO_READLOAD(ln, port)); + val = I915_READ(MG_TX1_PISO_READLOAD(ln, tc_port)); val |= CRI_CALCINIT; - I915_WRITE(MG_TX1_PISO_READLOAD(ln, port), val); + I915_WRITE(MG_TX1_PISO_READLOAD(ln, tc_port), val); - val = I915_READ(MG_TX2_PISO_READLOAD(ln, port)); + val = I915_READ(MG_TX2_PISO_READLOAD(ln, tc_port)); val |= CRI_CALCINIT; - I915_WRITE(MG_TX2_PISO_READLOAD(ln, port), val); + I915_WRITE(MG_TX2_PISO_READLOAD(ln, tc_port), val); } } @@ -2711,6 +2813,62 @@ static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, icl_mg_phy_ddi_vswing_sequence(encoder, link_clock, level); } +static void +tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, + u32 level) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum tc_port tc_port = intel_port_to_tc(dev_priv, encoder->port); + const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; + u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; + + n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + ddi_translations = tgl_dkl_phy_ddi_translations; + + if (level >= n_entries) + level = n_entries - 1; + + dpcnt_mask = (DKL_TX_PRESHOOT_COEFF_MASK | + DKL_TX_DE_EMPAHSIS_COEFF_MASK | + DKL_TX_VSWING_CONTROL_MASK); + dpcnt_val = DKL_TX_VSWING_CONTROL(ddi_translations[level].dkl_vswing_control); + dpcnt_val |= DKL_TX_DE_EMPHASIS_COEFF(ddi_translations[level].dkl_de_emphasis_control); + dpcnt_val |= DKL_TX_PRESHOOT_COEFF(ddi_translations[level].dkl_preshoot_control); + + for (ln = 0; ln < 2; ln++) { + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln)); + + /* All the registers are RMW */ + val = I915_READ(DKL_TX_DPCNTL0(tc_port)); + val &= ~dpcnt_mask; + val |= dpcnt_val; + I915_WRITE(DKL_TX_DPCNTL0(tc_port), val); + + val = I915_READ(DKL_TX_DPCNTL1(tc_port)); + val &= ~dpcnt_mask; + val |= dpcnt_val; + I915_WRITE(DKL_TX_DPCNTL1(tc_port), val); + + val = I915_READ(DKL_TX_DPCNTL2(tc_port)); + val &= ~DKL_TX_DP20BITMODE; + I915_WRITE(DKL_TX_DPCNTL2(tc_port), val); + } +} + +static void tgl_ddi_vswing_sequence(struct intel_encoder *encoder, + int link_clock, + u32 level, + enum intel_output_type type) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + + if (intel_phy_is_combo(dev_priv, phy)) + icl_combo_phy_ddi_vswing_sequence(encoder, level, type); + else + tgl_dkl_phy_ddi_vswing_sequence(encoder, link_clock, level); +} + static u32 translate_signal_level(int signal_levels) { int i; @@ -2742,7 +2900,10 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp) struct intel_encoder *encoder = &dport->base; int level = intel_ddi_dp_level(intel_dp); - if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 12) + tgl_ddi_vswing_sequence(encoder, intel_dp->link_rate, + level, encoder->type); + else if (INTEL_GEN(dev_priv) >= 11) icl_ddi_vswing_sequence(encoder, intel_dp->link_rate, level, encoder->type); else if (IS_CANNONLAKE(dev_priv)) @@ -2989,130 +3150,141 @@ static void intel_ddi_clk_disable(struct intel_encoder *encoder) } } -static void icl_enable_phy_clock_gating(struct intel_digital_port *dig_port) +static void +icl_phy_set_clock_gating(struct intel_digital_port *dig_port, bool enable) { struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->base.port; - enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - u32 val; + enum tc_port tc_port = intel_port_to_tc(dev_priv, dig_port->base.port); + u32 val, bits; int ln; if (tc_port == PORT_TC_NONE) return; - for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_DP_MODE(ln, port)); - val |= MG_DP_MODE_CFG_TR2PWR_GATING | - MG_DP_MODE_CFG_TRPWR_GATING | - MG_DP_MODE_CFG_CLNPWR_GATING | - MG_DP_MODE_CFG_DIGPWR_GATING | - MG_DP_MODE_CFG_GAONPWR_GATING; - I915_WRITE(MG_DP_MODE(ln, port), val); - } - - val = I915_READ(MG_MISC_SUS0(tc_port)); - val |= MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(3) | - MG_MISC_SUS0_CFG_TR2PWR_GATING | - MG_MISC_SUS0_CFG_CL2PWR_GATING | - MG_MISC_SUS0_CFG_GAONPWR_GATING | - MG_MISC_SUS0_CFG_TRPWR_GATING | - MG_MISC_SUS0_CFG_CL1PWR_GATING | - MG_MISC_SUS0_CFG_DGPWR_GATING; - I915_WRITE(MG_MISC_SUS0(tc_port), val); -} + bits = MG_DP_MODE_CFG_TR2PWR_GATING | MG_DP_MODE_CFG_TRPWR_GATING | + MG_DP_MODE_CFG_CLNPWR_GATING | MG_DP_MODE_CFG_DIGPWR_GATING | + MG_DP_MODE_CFG_GAONPWR_GATING; -static void icl_disable_phy_clock_gating(struct intel_digital_port *dig_port) -{ - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - enum port port = dig_port->base.port; - enum tc_port tc_port = intel_port_to_tc(dev_priv, port); - u32 val; - int ln; + for (ln = 0; ln < 2; ln++) { + if (INTEL_GEN(dev_priv) >= 12) { + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln)); + val = I915_READ(DKL_DP_MODE(tc_port)); + } else { + val = I915_READ(MG_DP_MODE(ln, tc_port)); + } - if (tc_port == PORT_TC_NONE) - return; + if (enable) + val |= bits; + else + val &= ~bits; - for (ln = 0; ln < 2; ln++) { - val = I915_READ(MG_DP_MODE(ln, port)); - val &= ~(MG_DP_MODE_CFG_TR2PWR_GATING | - MG_DP_MODE_CFG_TRPWR_GATING | - MG_DP_MODE_CFG_CLNPWR_GATING | - MG_DP_MODE_CFG_DIGPWR_GATING | - MG_DP_MODE_CFG_GAONPWR_GATING); - I915_WRITE(MG_DP_MODE(ln, port), val); + if (INTEL_GEN(dev_priv) >= 12) + I915_WRITE(DKL_DP_MODE(tc_port), val); + else + I915_WRITE(MG_DP_MODE(ln, tc_port), val); } - val = I915_READ(MG_MISC_SUS0(tc_port)); - val &= ~(MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK | - MG_MISC_SUS0_CFG_TR2PWR_GATING | - MG_MISC_SUS0_CFG_CL2PWR_GATING | - MG_MISC_SUS0_CFG_GAONPWR_GATING | - MG_MISC_SUS0_CFG_TRPWR_GATING | - MG_MISC_SUS0_CFG_CL1PWR_GATING | - MG_MISC_SUS0_CFG_DGPWR_GATING); - I915_WRITE(MG_MISC_SUS0(tc_port), val); + if (INTEL_GEN(dev_priv) == 11) { + bits = MG_MISC_SUS0_CFG_TR2PWR_GATING | + MG_MISC_SUS0_CFG_CL2PWR_GATING | + MG_MISC_SUS0_CFG_GAONPWR_GATING | + MG_MISC_SUS0_CFG_TRPWR_GATING | + MG_MISC_SUS0_CFG_CL1PWR_GATING | + MG_MISC_SUS0_CFG_DGPWR_GATING; + + val = I915_READ(MG_MISC_SUS0(tc_port)); + if (enable) + val |= (bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE(3)); + else + val &= ~(bits | MG_MISC_SUS0_SUSCLK_DYNCLKGATE_MODE_MASK); + I915_WRITE(MG_MISC_SUS0(tc_port), val); + } } -static void icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port) +static void +icl_program_mg_dp_mode(struct intel_digital_port *intel_dig_port, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); - enum port port = intel_dig_port->base.port; - u32 ln0, ln1, lane_mask; + enum tc_port tc_port = intel_port_to_tc(dev_priv, intel_dig_port->base.port); + u32 ln0, ln1, pin_assignment; + u8 width; if (intel_dig_port->tc_mode == TC_PORT_TBT_ALT) return; - ln0 = I915_READ(MG_DP_MODE(0, port)); - ln1 = I915_READ(MG_DP_MODE(1, port)); + if (INTEL_GEN(dev_priv) >= 12) { + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x0)); + ln0 = I915_READ(DKL_DP_MODE(tc_port)); + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x1)); + ln1 = I915_READ(DKL_DP_MODE(tc_port)); + } else { + ln0 = I915_READ(MG_DP_MODE(0, tc_port)); + ln1 = I915_READ(MG_DP_MODE(1, tc_port)); + } - switch (intel_dig_port->tc_mode) { - case TC_PORT_DP_ALT: - ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); - ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); + ln0 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X1_MODE); + ln1 &= ~(MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE); - lane_mask = intel_tc_port_get_lane_mask(intel_dig_port); + /* DPPATC */ + pin_assignment = intel_tc_port_get_pin_assignment_mask(intel_dig_port); + width = crtc_state->lane_count; - switch (lane_mask) { - case 0x1: - case 0x4: - break; - case 0x2: + switch (pin_assignment) { + case 0x0: + WARN_ON(intel_dig_port->tc_mode != TC_PORT_LEGACY); + if (width == 1) { + ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; + } else { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } + break; + case 0x1: + if (width == 4) { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } + break; + case 0x2: + if (width == 2) { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } + break; + case 0x3: + case 0x5: + if (width == 1) { ln0 |= MG_DP_MODE_CFG_DP_X1_MODE; - break; - case 0x3: - ln0 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - break; - case 0x8: ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; - break; - case 0xC: - ln1 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - break; - case 0xF: - ln0 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - ln1 |= MG_DP_MODE_CFG_DP_X1_MODE | - MG_DP_MODE_CFG_DP_X2_MODE; - break; - default: - MISSING_CASE(lane_mask); + } else { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; } break; - - case TC_PORT_LEGACY: - ln0 |= MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE; - ln1 |= MG_DP_MODE_CFG_DP_X1_MODE | MG_DP_MODE_CFG_DP_X2_MODE; + case 0x4: + case 0x6: + if (width == 1) { + ln0 |= MG_DP_MODE_CFG_DP_X1_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X1_MODE; + } else { + ln0 |= MG_DP_MODE_CFG_DP_X2_MODE; + ln1 |= MG_DP_MODE_CFG_DP_X2_MODE; + } break; - default: - MISSING_CASE(intel_dig_port->tc_mode); - return; + MISSING_CASE(pin_assignment); } - I915_WRITE(MG_DP_MODE(0, port), ln0); - I915_WRITE(MG_DP_MODE(1, port), ln1); + if (INTEL_GEN(dev_priv) >= 12) { + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x0)); + I915_WRITE(DKL_DP_MODE(tc_port), ln0); + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x1)); + I915_WRITE(DKL_DP_MODE(tc_port), ln1); + } else { + I915_WRITE(MG_DP_MODE(0, tc_port), ln0); + I915_WRITE(MG_DP_MODE(1, tc_port), ln1); + } } static void intel_dp_sink_set_fec_ready(struct intel_dp *intel_dp, @@ -3129,17 +3301,18 @@ static void intel_ddi_enable_fec(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + struct intel_dp *intel_dp; u32 val; if (!crtc_state->fec_enable) return; - val = I915_READ(DP_TP_CTL(port)); + intel_dp = enc_to_intel_dp(&encoder->base); + val = I915_READ(intel_dp->regs.dp_tp_ctl); val |= DP_TP_CTL_FEC_ENABLE; - I915_WRITE(DP_TP_CTL(port), val); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); - if (intel_de_wait_for_set(dev_priv, DP_TP_STATUS(port), + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, DP_TP_STATUS_FEC_ENABLE_LIVE, 1)) DRM_ERROR("Timed out waiting for FEC Enable Status\n"); } @@ -3148,21 +3321,205 @@ static void intel_ddi_disable_fec_state(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = encoder->port; + struct intel_dp *intel_dp; u32 val; if (!crtc_state->fec_enable) return; - val = I915_READ(DP_TP_CTL(port)); + intel_dp = enc_to_intel_dp(&encoder->base); + val = I915_READ(intel_dp->regs.dp_tp_ctl); val &= ~DP_TP_CTL_FEC_ENABLE; - I915_WRITE(DP_TP_CTL(port), val); - POSTING_READ(DP_TP_CTL(port)); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); + POSTING_READ(intel_dp->regs.dp_tp_ctl); } -static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) +static void +tgl_clear_psr2_transcoder_exitline(const struct intel_crtc_state *cstate) +{ + struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); + u32 val; + + if (!cstate->dc3co_exitline) + return; + + val = I915_READ(EXITLINE(cstate->cpu_transcoder)); + val &= ~(EXITLINE_MASK | EXITLINE_ENABLE); + I915_WRITE(EXITLINE(cstate->cpu_transcoder), val); +} + +static void +tgl_set_psr2_transcoder_exitline(const struct intel_crtc_state *cstate) +{ + u32 val, exit_scanlines; + struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); + + if (!cstate->dc3co_exitline) + return; + + exit_scanlines = cstate->dc3co_exitline; + exit_scanlines <<= EXITLINE_SHIFT; + val = I915_READ(EXITLINE(cstate->cpu_transcoder)); + val &= ~(EXITLINE_MASK | EXITLINE_ENABLE); + val |= exit_scanlines; + val |= EXITLINE_ENABLE; + I915_WRITE(EXITLINE(cstate->cpu_transcoder), val); +} + +static void tgl_dc3co_exitline_compute_config(struct intel_encoder *encoder, + struct intel_crtc_state *cstate) +{ + u32 exit_scanlines; + struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); + u32 crtc_vdisplay = cstate->base.adjusted_mode.crtc_vdisplay; + + cstate->dc3co_exitline = 0; + + if (!(dev_priv->csr.allowed_dc_mask & DC_STATE_EN_DC3CO)) + return; + + /* B.Specs:49196 DC3CO only works with pipeA and DDIA.*/ + if (to_intel_crtc(cstate->base.crtc)->pipe != PIPE_A || + encoder->port != PORT_A) + return; + + if (!cstate->has_psr2 || !cstate->base.active) + return; + + /* + * DC3CO Exit time 200us B.Spec 49196 + * PSR2 transcoder Early Exit scanlines = ROUNDUP(200 / line time) + 1 + */ + exit_scanlines = + intel_usecs_to_scanlines(&cstate->base.adjusted_mode, 200) + 1; + + if (WARN_ON(exit_scanlines > crtc_vdisplay)) + return; + + cstate->dc3co_exitline = crtc_vdisplay - exit_scanlines; + DRM_DEBUG_KMS("DC3CO exit scanlines %d\n", cstate->dc3co_exitline); +} + +static void tgl_dc3co_exitline_get_config(struct intel_crtc_state *crtc_state) +{ + u32 val; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + if (INTEL_GEN(dev_priv) < 12) + return; + + val = I915_READ(EXITLINE(crtc_state->cpu_transcoder)); + + if (val & EXITLINE_ENABLE) + crtc_state->dc3co_exitline = val & EXITLINE_MASK; +} + +static void tgl_ddi_pre_enable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); + int level = intel_ddi_dp_level(intel_dp); + enum transcoder transcoder = crtc_state->cpu_transcoder; + + tgl_set_psr2_transcoder_exitline(crtc_state); + intel_dp_set_link_params(intel_dp, crtc_state->port_clock, + crtc_state->lane_count, is_mst); + + intel_dp->regs.dp_tp_ctl = TGL_DP_TP_CTL(transcoder); + intel_dp->regs.dp_tp_status = TGL_DP_TP_STATUS(transcoder); + + /* 1.a got on intel_atomic_commit_tail() */ + + /* 2. */ + intel_edp_panel_on(intel_dp); + + /* + * 1.b, 3. and 4.a is done before tgl_ddi_pre_enable_dp() by: + * haswell_crtc_enable()->intel_encoders_pre_pll_enable() and + * haswell_crtc_enable()->intel_enable_shared_dpll() + */ + + /* 4.b */ + intel_ddi_clk_select(encoder, crtc_state); + + /* 5. */ + if (!intel_phy_is_tc(dev_priv, phy) || + dig_port->tc_mode != TC_PORT_TBT_ALT) + intel_display_power_get(dev_priv, + dig_port->ddi_io_power_domain); + + /* 6. */ + icl_program_mg_dp_mode(dig_port, crtc_state); + + /* + * 7.a - Steps in this function should only be executed over MST + * master, what will be taken in care by MST hook + * intel_mst_pre_enable_dp() + */ + intel_ddi_enable_pipe_clock(crtc_state); + + /* 7.b */ + intel_ddi_config_transcoder_func(crtc_state); + + /* 7.d */ + icl_phy_set_clock_gating(dig_port, false); + + /* 7.e */ + tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level, + encoder->type); + + /* 7.f */ + if (intel_phy_is_combo(dev_priv, phy)) { + bool lane_reversal = + dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; + + intel_combo_phy_power_up_lanes(dev_priv, phy, false, + crtc_state->lane_count, + lane_reversal); + } + + /* 7.g */ + intel_ddi_init_dp_buf_reg(encoder); + + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + + intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); + /* + * DDI FEC: "anticipates enabling FEC encoding sets the FEC_READY bit + * in the FEC_CONFIGURATION register to 1 before initiating link + * training + */ + intel_dp_sink_set_fec_ready(intel_dp, crtc_state); + /* 7.c, 7.h, 7.i, 7.j */ + intel_dp_start_link_train(intel_dp); + + /* 7.k */ + if (!is_trans_port_sync_mode(crtc_state)) + intel_dp_stop_link_train(intel_dp); + + /* + * TODO: enable clock gating + * + * It is not written in DP enabling sequence but "PHY Clockgating + * programming" states that clock gating should be enabled after the + * link training but doing so causes all the following trainings to fail + * so not enabling it for now. + */ + + /* 7.l */ + intel_ddi_enable_fec(encoder, crtc_state); + intel_dsc_enable(encoder, crtc_state); +} + +static void hsw_ddi_pre_enable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -3177,6 +3534,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_dp_set_link_params(intel_dp, crtc_state->port_clock, crtc_state->lane_count, is_mst); + intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port); + intel_dp->regs.dp_tp_status = DP_TP_STATUS(port); + intel_edp_panel_on(intel_dp); intel_ddi_clk_select(encoder, crtc_state); @@ -3186,8 +3546,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - icl_program_mg_dp_mode(dig_port); - icl_disable_phy_clock_gating(dig_port); + icl_program_mg_dp_mode(dig_port, crtc_state); + icl_phy_set_clock_gating(dig_port, false); if (INTEL_GEN(dev_priv) >= 11) icl_ddi_vswing_sequence(encoder, crtc_state->port_clock, @@ -3215,12 +3575,13 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, true); intel_dp_sink_set_fec_ready(intel_dp, crtc_state); intel_dp_start_link_train(intel_dp); - if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) + if ((port != PORT_A || INTEL_GEN(dev_priv) >= 9) && + !is_trans_port_sync_mode(crtc_state)) intel_dp_stop_link_train(intel_dp); intel_ddi_enable_fec(encoder, crtc_state); - icl_enable_phy_clock_gating(dig_port); + icl_phy_set_clock_gating(dig_port, true); if (!is_mst) intel_ddi_enable_pipe_clock(crtc_state); @@ -3228,6 +3589,20 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_dsc_enable(encoder, crtc_state); } +static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (INTEL_GEN(dev_priv) >= 12) + tgl_ddi_pre_enable_dp(encoder, crtc_state, conn_state); + else + hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state); + + intel_ddi_set_dp_msa(crtc_state, conn_state); +} + static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) @@ -3244,10 +3619,13 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - icl_program_mg_dp_mode(dig_port); - icl_disable_phy_clock_gating(dig_port); + icl_program_mg_dp_mode(dig_port, crtc_state); + icl_phy_set_clock_gating(dig_port, false); - if (INTEL_GEN(dev_priv) >= 11) + if (INTEL_GEN(dev_priv) >= 12) + tgl_ddi_vswing_sequence(encoder, crtc_state->port_clock, + level, INTEL_OUTPUT_HDMI); + else if (INTEL_GEN(dev_priv) == 11) icl_ddi_vswing_sequence(encoder, crtc_state->port_clock, level, INTEL_OUTPUT_HDMI); else if (IS_CANNONLAKE(dev_priv)) @@ -3257,7 +3635,7 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, else intel_prepare_hdmi_ddi_buffers(encoder, level); - icl_enable_phy_clock_gating(dig_port); + icl_phy_set_clock_gating(dig_port, true); if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level, INTEL_OUTPUT_HDMI); @@ -3330,10 +3708,14 @@ static void intel_disable_ddi_buf(struct intel_encoder *encoder, wait = true; } - val = I915_READ(DP_TP_CTL(port)); - val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); - val |= DP_TP_CTL_LINK_TRAIN_PAT1; - I915_WRITE(DP_TP_CTL(port), val); + if (intel_crtc_has_dp_encoder(crtc_state)) { + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + + val = I915_READ(intel_dp->regs.dp_tp_ctl); + val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); + val |= DP_TP_CTL_LINK_TRAIN_PAT1; + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); + } /* Disable FEC in DP Sink */ intel_ddi_disable_fec_state(encoder, crtc_state); @@ -3373,6 +3755,7 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, dig_port->ddi_io_power_domain); intel_ddi_clk_disable(encoder); + tgl_clear_psr2_transcoder_exitline(old_crtc_state); } static void intel_ddi_post_disable_hdmi(struct intel_encoder *encoder, @@ -3475,7 +3858,8 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder, intel_edp_backlight_on(crtc_state, conn_state); intel_psr_enable(intel_dp, crtc_state); - intel_dp_ycbcr_420_enable(intel_dp, crtc_state); + intel_dp_vsc_enable(intel_dp, crtc_state, conn_state); + intel_dp_hdr_metadata_enable(intel_dp, crtc_state, conn_state); intel_edp_drrs_enable(intel_dp, crtc_state); if (crtc_state->has_audio) @@ -3633,7 +4017,7 @@ static void intel_ddi_update_pipe_dp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - intel_ddi_set_pipe_settings(crtc_state); + intel_ddi_set_dp_msa(crtc_state, conn_state); intel_psr_update(intel_dp, crtc_state); intel_edp_drrs_enable(intel_dp, crtc_state); @@ -3761,7 +4145,7 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) u32 val; bool wait = false; - if (I915_READ(DP_TP_CTL(port)) & DP_TP_CTL_ENABLE) { + if (I915_READ(intel_dp->regs.dp_tp_ctl) & DP_TP_CTL_ENABLE) { val = I915_READ(DDI_BUF_CTL(port)); if (val & DDI_BUF_CTL_ENABLE) { val &= ~DDI_BUF_CTL_ENABLE; @@ -3769,11 +4153,11 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) wait = true; } - val = I915_READ(DP_TP_CTL(port)); + val = I915_READ(intel_dp->regs.dp_tp_ctl); val &= ~(DP_TP_CTL_ENABLE | DP_TP_CTL_LINK_TRAIN_MASK); val |= DP_TP_CTL_LINK_TRAIN_PAT1; - I915_WRITE(DP_TP_CTL(port), val); - POSTING_READ(DP_TP_CTL(port)); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); + POSTING_READ(intel_dp->regs.dp_tp_ctl); if (wait) intel_wait_ddi_buf_idle(dev_priv, port); @@ -3788,8 +4172,8 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) val |= DP_TP_CTL_ENHANCED_FRAME_ENABLE; } - I915_WRITE(DP_TP_CTL(port), val); - POSTING_READ(DP_TP_CTL(port)); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); + POSTING_READ(intel_dp->regs.dp_tp_ctl); intel_dp->DP |= DDI_BUF_CTL_ENABLE; I915_WRITE(DDI_BUF_CTL(port), intel_dp->DP); @@ -3891,6 +4275,23 @@ void intel_ddi_get_config(struct intel_encoder *encoder, pipe_config->lane_count = ((temp & DDI_PORT_WIDTH_MASK) >> DDI_PORT_WIDTH_SHIFT) + 1; intel_dp_get_m_n(intel_crtc, pipe_config); + + if (INTEL_GEN(dev_priv) >= 11) { + i915_reg_t dp_tp_ctl; + + if (IS_GEN(dev_priv, 11)) + dp_tp_ctl = DP_TP_CTL(encoder->port); + else + dp_tp_ctl = TGL_DP_TP_CTL(pipe_config->cpu_transcoder); + + pipe_config->fec_enable = + I915_READ(dp_tp_ctl) & DP_TP_CTL_FEC_ENABLE; + + DRM_DEBUG_KMS("[ENCODER:%d:%s] Fec status: %u\n", + encoder->base.base.id, encoder->base.name, + pipe_config->fec_enable); + } + break; case TRANS_DDI_MODE_SELECT_DP_MST: pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST); @@ -3902,6 +4303,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder, break; } + if (encoder->type == INTEL_OUTPUT_EDP) + tgl_dc3co_exitline_get_config(pipe_config); + pipe_config->has_audio = intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder); @@ -3979,10 +4383,13 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, if (HAS_TRANSCODER_EDP(dev_priv) && port == PORT_A) pipe_config->cpu_transcoder = TRANSCODER_EDP; - if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) + if (intel_crtc_has_type(pipe_config, INTEL_OUTPUT_HDMI)) { ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state); - else + } else { ret = intel_dp_compute_config(encoder, pipe_config, conn_state); + tgl_dc3co_exitline_compute_config(encoder, pipe_config); + } + if (ret) return ret; @@ -4351,46 +4758,9 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->update_complete = intel_ddi_update_complete; } - switch (port) { - case PORT_A: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_A_IO; - break; - case PORT_B: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_B_IO; - break; - case PORT_C: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_C_IO; - break; - case PORT_D: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_D_IO; - break; - case PORT_E: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_E_IO; - break; - case PORT_F: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_F_IO; - break; - case PORT_G: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_G_IO; - break; - case PORT_H: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_H_IO; - break; - case PORT_I: - intel_dig_port->ddi_io_power_domain = - POWER_DOMAIN_PORT_DDI_I_IO; - break; - default: - MISSING_CASE(port); - } + WARN_ON(port > PORT_I); + intel_dig_port->ddi_io_power_domain = POWER_DOMAIN_PORT_DDI_A_IO + + port - PORT_A; if (init_dp) { if (!intel_ddi_init_dp_connector(intel_dig_port)) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h index a08365da2643..19aeab1246ee 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.h +++ b/drivers/gpu/drm/i915/display/intel_ddi.h @@ -30,7 +30,8 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state); void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state); void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); -void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state); +void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); void intel_ddi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b51d1ceb8739..2912abd85148 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -31,7 +31,6 @@ #include <linux/module.h> #include <linux/dma-resv.h> #include <linux/slab.h> -#include <linux/vgaarb.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> @@ -79,6 +78,7 @@ #include "intel_sideband.h" #include "intel_sprite.h" #include "intel_tc.h" +#include "intel_vga.h" /* Primary plane formats for gen <= 3 */ static const u32 i8xx_primary_formats[] = { @@ -135,8 +135,6 @@ static void vlv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); static void chv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); -static void intel_begin_crtc_commit(struct intel_atomic_state *, struct intel_crtc *); -static void intel_finish_crtc_commit(struct intel_atomic_state *, struct intel_crtc *); static void intel_crtc_init_scalers(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); static void skylake_pfit_enable(const struct intel_crtc_state *crtc_state); @@ -490,7 +488,7 @@ static const struct intel_limit intel_limits_bxt = { /* WA Display #0827: Gen9:all */ static void -skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable) +skl_wa_827(struct drm_i915_private *dev_priv, enum pipe pipe, bool enable) { if (enable) I915_WRITE(CLKGATE_DIS_PSL(pipe), @@ -521,6 +519,20 @@ needs_modeset(const struct intel_crtc_state *state) return drm_atomic_crtc_needs_modeset(&state->base); } +bool +is_trans_port_sync_mode(const struct intel_crtc_state *crtc_state) +{ + return (crtc_state->master_transcoder != INVALID_TRANSCODER || + crtc_state->sync_mode_slaves_mask); +} + +static bool +is_trans_port_sync_master(const struct intel_crtc_state *crtc_state) +{ + return (crtc_state->master_transcoder == INVALID_TRANSCODER && + crtc_state->sync_mode_slaves_mask); +} + /* * Platform specific helpers to calculate the port PLL loopback- (clock.m), * and post-divider (clock.p) values, pre- (clock.vco) and post-divided fast @@ -1612,8 +1624,8 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, if (intel_de_wait_for_register(dev_priv, dpll_reg, port_mask, expected_mask, 1000)) - WARN(1, "timed out waiting for port %c ready: got 0x%x, expected 0x%x\n", - port_name(dport->base.port), + WARN(1, "timed out waiting for [ENCODER:%d:%s] port ready: got 0x%x, expected 0x%x\n", + dport->base.base.base.id, dport->base.base.name, I915_READ(dpll_reg) & port_mask, expected_mask); } @@ -2079,7 +2091,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int pinctl; u32 alignment; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + if (WARN_ON(!i915_gem_object_is_framebuffer(obj))) + return ERR_PTR(-EINVAL); alignment = intel_surf_alignment(fb, 0); @@ -2161,8 +2174,6 @@ err: void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - i915_gem_object_lock(vma->obj); if (flags & PLANE_HAS_FENCE) i915_vma_unpin_fence(vma); @@ -2736,10 +2747,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, size++; /* rotate the x/y offsets to match the GTT view */ - r.x1 = x; - r.y1 = y; - r.x2 = x + width; - r.y2 = y + height; + drm_rect_init(&r, x, y, width, height); drm_rect_rotate(&r, rot_info->plane[i].width * tile_width, rot_info->plane[i].height * tile_height, @@ -2861,10 +2869,7 @@ intel_plane_remap_gtt(struct intel_plane_state *plane_state) struct drm_rect r; /* rotate the x/y offsets to match the GTT view */ - r.x1 = x; - r.y1 = y; - r.x2 = x + width; - r.y2 = y + height; + drm_rect_init(&r, x, y, width, height); drm_rect_rotate(&r, info->plane[i].width * tile_width, info->plane[i].height * tile_height, @@ -3063,13 +3068,11 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; } - mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen_for_preallocated(dev_priv, base_aligned, base_aligned, size_aligned); - mutex_unlock(&dev->struct_mutex); - if (!obj) + if (IS_ERR(obj)) return false; switch (plane_config->tiling) { @@ -3230,13 +3233,11 @@ valid_fb: intel_state->color_plane[0].stride = intel_fb_pitch(fb, 0, intel_state->base.rotation); - mutex_lock(&dev->struct_mutex); intel_state->vma = intel_pin_and_fence_fb_obj(fb, &intel_state->view, intel_plane_uses_fence(intel_state), &intel_state->flags); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(intel_state->vma)) { DRM_ERROR("failed to pin boot fb on pipe %d: %li\n", intel_crtc->pipe, PTR_ERR(intel_state->vma)); @@ -3280,7 +3281,20 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, switch (fb->modifier) { case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: - return 4096; + /* + * Validated limit is 4k, but has 5k should + * work apart from the following features: + * - Ytile (already limited to 4k) + * - FP16 (already limited to 4k) + * - render compression (already limited to 4k) + * - KVMR sprite and cursor (don't care) + * - horizontal panning (TODO verify this) + * - pipe and plane scaling (TODO verify this) + */ + if (cpp == 8) + return 4096; + else + return 5120; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: /* FIXME AUX plane? */ @@ -3331,6 +3345,16 @@ static int icl_max_plane_width(const struct drm_framebuffer *fb, return 5120; } +static int skl_max_plane_height(void) +{ + return 4096; +} + +static int icl_max_plane_height(void) +{ + return 4320; +} + static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, int main_x, int main_y, u32 main_offset) { @@ -3379,7 +3403,7 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) int w = drm_rect_width(&plane_state->base.src) >> 16; int h = drm_rect_height(&plane_state->base.src) >> 16; int max_width; - int max_height = 4096; + int max_height; u32 alignment, offset, aux_offset = plane_state->color_plane[1].offset; if (INTEL_GEN(dev_priv) >= 11) @@ -3389,6 +3413,11 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) else max_width = skl_max_plane_width(fb, 0, rotation); + if (INTEL_GEN(dev_priv) >= 11) + max_height = icl_max_plane_height(); + else + max_height = skl_max_plane_height(); + if (w > max_width || h > max_height) { DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n", w, h, max_width, max_height); @@ -3455,9 +3484,8 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) * Put the final coordinates back so that the src * coordinate checks will see the right values. */ - drm_rect_translate(&plane_state->base.src, - (x << 16) - plane_state->base.src.x1, - (y << 16) - plane_state->base.src.y1); + drm_rect_translate_to(&plane_state->base.src, + x << 16, y << 16); return 0; } @@ -3528,7 +3556,7 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) * Handle the AUX surface first since * the main surface setup depends on it. */ - if (is_planar_yuv_format(fb->format->format)) { + if (drm_format_info_is_yuv_semiplanar(fb->format)) { ret = skl_check_nv12_aux_surface(plane_state); if (ret) return ret; @@ -3679,9 +3707,8 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) * Put the final coordinates back so that the src * coordinate checks will see the right values. */ - drm_rect_translate(&plane_state->base.src, - (src_x << 16) - plane_state->base.src.x1, - (src_y << 16) - plane_state->base.src.y1); + drm_rect_translate_to(&plane_state->base.src, + src_x << 16, src_y << 16); /* HSW/BDW do this automagically in hardware */ if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) { @@ -4211,7 +4238,7 @@ __intel_display_resume(struct drm_device *dev, int i, ret; intel_modeset_setup_hw_state(dev, ctx); - i915_redisable_vga(to_i915(dev)); + intel_vga_redisable(to_i915(dev)); if (!state) return 0; @@ -4243,7 +4270,7 @@ __intel_display_resume(struct drm_device *dev, static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv) { return (INTEL_INFO(dev_priv)->gpu_reset_clobbers_display && - intel_has_gpu_reset(dev_priv)); + intel_has_gpu_reset(&dev_priv->gt)); } void intel_prepare_reset(struct drm_i915_private *dev_priv) @@ -4330,7 +4357,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) * so need a full re-initialization. */ intel_pps_unlock_regs_wa(dev_priv); - intel_modeset_init_hw(dev); + intel_modeset_init_hw(dev_priv); intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); @@ -4378,50 +4405,60 @@ static void icl_set_pipe_chicken(struct intel_crtc *crtc) I915_WRITE(PIPE_CHICKEN(pipe), tmp); } -static void intel_update_pipe_config(const struct intel_crtc_state *old_crtc_state, - const struct intel_crtc_state *new_crtc_state) +static void icl_enable_trans_port_sync(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - - /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ - crtc->base.mode = new_crtc_state->base.mode; + u32 trans_ddi_func_ctl2_val; + u8 master_select; /* - * Update pipe size and adjust fitter if needed: the reason for this is - * that in compute_mode_changes we check the native mode (not the pfit - * mode) to see if we can flip rather than do a full mode set. In the - * fastboot case, we'll flip, but if we don't update the pipesrc and - * pfit state, we'll end up with a big fb scanned out into the wrong - * sized surface. + * Configure the master select and enable Transcoder Port Sync for + * Slave CRTCs transcoder. */ + if (crtc_state->master_transcoder == INVALID_TRANSCODER) + return; - I915_WRITE(PIPESRC(crtc->pipe), - ((new_crtc_state->pipe_src_w - 1) << 16) | - (new_crtc_state->pipe_src_h - 1)); + if (crtc_state->master_transcoder == TRANSCODER_EDP) + master_select = 0; + else + master_select = crtc_state->master_transcoder + 1; - /* on skylake this is done by detaching scalers */ - if (INTEL_GEN(dev_priv) >= 9) { - skl_detach_scalers(new_crtc_state); + /* Set the master select bits for Tranascoder Port Sync */ + trans_ddi_func_ctl2_val = (PORT_SYNC_MODE_MASTER_SELECT(master_select) & + PORT_SYNC_MODE_MASTER_SELECT_MASK) << + PORT_SYNC_MODE_MASTER_SELECT_SHIFT; + /* Enable Transcoder Port Sync */ + trans_ddi_func_ctl2_val |= PORT_SYNC_MODE_ENABLE; - if (new_crtc_state->pch_pfit.enabled) - skylake_pfit_enable(new_crtc_state); - } else if (HAS_PCH_SPLIT(dev_priv)) { - if (new_crtc_state->pch_pfit.enabled) - ironlake_pfit_enable(new_crtc_state); - else if (old_crtc_state->pch_pfit.enabled) - ironlake_pfit_disable(old_crtc_state); - } + I915_WRITE(TRANS_DDI_FUNC_CTL2(crtc_state->cpu_transcoder), + trans_ddi_func_ctl2_val); +} - if (INTEL_GEN(dev_priv) >= 11) - icl_set_pipe_chicken(crtc); +static void icl_disable_transcoder_port_sync(const struct intel_crtc_state *old_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + i915_reg_t reg; + u32 trans_ddi_func_ctl2_val; + + if (old_crtc_state->master_transcoder == INVALID_TRANSCODER) + return; + + DRM_DEBUG_KMS("Disabling Transcoder Port Sync on Slave Transcoder %s\n", + transcoder_name(old_crtc_state->cpu_transcoder)); + + reg = TRANS_DDI_FUNC_CTL2(old_crtc_state->cpu_transcoder); + trans_ddi_func_ctl2_val = ~(PORT_SYNC_MODE_ENABLE | + PORT_SYNC_MODE_MASTER_SELECT_MASK); + I915_WRITE(reg, trans_ddi_func_ctl2_val); } static void intel_fdi_normal_train(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -4464,7 +4501,7 @@ static void ironlake_fdi_link_train(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp, tries; @@ -4565,7 +4602,7 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, retry; @@ -4698,7 +4735,7 @@ static void ivb_manual_fdi_link_train(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, j; @@ -4816,7 +4853,7 @@ static void ironlake_fdi_pll_enable(const struct intel_crtc_state *crtc_state) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; i915_reg_t reg; u32 temp; @@ -4853,7 +4890,7 @@ static void ironlake_fdi_pll_disable(struct intel_crtc *intel_crtc) { struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; i915_reg_t reg; u32 temp; @@ -4884,7 +4921,7 @@ static void ironlake_fdi_disable(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; i915_reg_t reg; u32 temp; @@ -5199,7 +5236,7 @@ static void ironlake_pch_enable(const struct intel_atomic_state *state, struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; u32 temp; assert_pch_transcoder_disabled(dev_priv, pipe); @@ -5294,7 +5331,7 @@ static void lpt_pch_enable(const struct intel_atomic_state *state, lpt_enable_pch_transcoder(dev_priv, cpu_transcoder); } -static void cpt_verify_modeset(struct drm_device *dev, int pipe) +static void cpt_verify_modeset(struct drm_device *dev, enum pipe pipe) { struct drm_i915_private *dev_priv = to_i915(dev); i915_reg_t dslreg = PIPEDSL(pipe); @@ -5446,7 +5483,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, return 0; } - if (format && is_planar_yuv_format(format->format) && + if (format && drm_format_info_is_yuv_semiplanar(format) && (src_h < SKL_MIN_YUV_420_SRC_H || src_w < SKL_MIN_YUV_420_SRC_W)) { DRM_DEBUG_KMS("Planar YUV: src dimensions not met\n"); return -EINVAL; @@ -5523,7 +5560,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, /* Pre-gen11 and SDR planes always need a scaler for planar formats. */ if (!icl_is_hdr_plane(dev_priv, intel_plane->id) && - fb && is_planar_yuv_format(fb->format->format)) + fb && drm_format_info_is_yuv_semiplanar(fb->format)) need_scaler = true; ret = skl_update_scaler(crtc_state, force_detach, @@ -5633,7 +5670,7 @@ static void ironlake_pfit_enable(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; if (crtc_state->pch_pfit.enabled) { /* Force use of hard-coded filter coefficients @@ -5715,13 +5752,8 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) { - if (intel_crtc->overlay) { - struct drm_device *dev = intel_crtc->base.dev; - - mutex_lock(&dev->struct_mutex); + if (intel_crtc->overlay) (void) intel_overlay_switch_off(intel_crtc->overlay); - mutex_unlock(&dev->struct_mutex); - } /* Let userspace switch the overlay on again. In most cases userspace * has to recompute where to put it anyway. @@ -5746,7 +5778,7 @@ intel_post_enable_primary(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; /* * Gen2 reports pipe underruns whenever all planes are disabled. @@ -5770,7 +5802,7 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; /* * Gen2 reports pipe underruns whenever all planes are disabled. @@ -6293,7 +6325,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; if (WARN_ON(intel_crtc->active)) return; @@ -6426,7 +6458,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe, hsw_workaround_pipe; + enum pipe pipe = intel_crtc->pipe, hsw_workaround_pipe; enum transcoder cpu_transcoder = pipe_config->cpu_transcoder; bool psl_clkgate_wa; @@ -6446,6 +6478,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (!transcoder_is_dsi(cpu_transcoder)) intel_set_pipe_timings(pipe_config); + if (INTEL_GEN(dev_priv) >= 11) + icl_enable_trans_port_sync(pipe_config); + intel_set_pipe_src_size(pipe_config); if (cpu_transcoder != TRANSCODER_EDP && @@ -6491,7 +6526,6 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (INTEL_GEN(dev_priv) >= 11) icl_set_pipe_chicken(intel_crtc); - intel_ddi_set_pipe_settings(pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_transcoder_func(pipe_config); @@ -6552,7 +6586,7 @@ static void ironlake_crtc_disable(struct intel_crtc_state *old_crtc_state, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; /* * Sometimes spurious CPU pipe underruns happen when the @@ -6624,6 +6658,9 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST)) intel_ddi_set_vc_payload_alloc(old_crtc_state, false); + if (INTEL_GEN(dev_priv) >= 11) + icl_disable_transcoder_port_sync(old_crtc_state); + if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_disable_transcoder_func(old_crtc_state); @@ -6721,6 +6758,8 @@ enum intel_display_power_domain intel_port_to_power_domain(enum port port) return POWER_DOMAIN_PORT_DDI_E_LANES; case PORT_F: return POWER_DOMAIN_PORT_DDI_F_LANES; + case PORT_G: + return POWER_DOMAIN_PORT_DDI_G_LANES; default: MISSING_CASE(port); return POWER_DOMAIN_PORT_OTHER; @@ -6737,16 +6776,18 @@ intel_aux_power_domain(struct intel_digital_port *dig_port) dig_port->tc_mode == TC_PORT_TBT_ALT) { switch (dig_port->aux_ch) { case AUX_CH_C: - return POWER_DOMAIN_AUX_TBT1; + return POWER_DOMAIN_AUX_C_TBT; case AUX_CH_D: - return POWER_DOMAIN_AUX_TBT2; + return POWER_DOMAIN_AUX_D_TBT; case AUX_CH_E: - return POWER_DOMAIN_AUX_TBT3; + return POWER_DOMAIN_AUX_E_TBT; case AUX_CH_F: - return POWER_DOMAIN_AUX_TBT4; + return POWER_DOMAIN_AUX_F_TBT; + case AUX_CH_G: + return POWER_DOMAIN_AUX_G_TBT; default: MISSING_CASE(dig_port->aux_ch); - return POWER_DOMAIN_AUX_TBT1; + return POWER_DOMAIN_AUX_C_TBT; } } @@ -6763,6 +6804,8 @@ intel_aux_power_domain(struct intel_digital_port *dig_port) return POWER_DOMAIN_AUX_E; case AUX_CH_F: return POWER_DOMAIN_AUX_F; + case AUX_CH_G: + return POWER_DOMAIN_AUX_G; default: MISSING_CASE(dig_port->aux_ch); return POWER_DOMAIN_AUX_A; @@ -6839,7 +6882,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; if (WARN_ON(intel_crtc->active)) return; @@ -6971,7 +7014,7 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; /* * On gen2 planes are double buffered but the pipe isn't, so we must @@ -7080,7 +7123,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, intel_display_power_put_unchecked(dev_priv, domain); intel_crtc->enabled_power_domains = 0; - dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe); + dev_priv->active_pipes &= ~BIT(intel_crtc->pipe); dev_priv->min_cdclk[intel_crtc->pipe] = 0; dev_priv->min_voltage_level[intel_crtc->pipe] = 0; @@ -7188,7 +7231,7 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, } } - if (INTEL_INFO(dev_priv)->num_pipes == 2) + if (INTEL_NUM_PIPES(dev_priv) == 2) return 0; /* Ivybridge 3 pipe is really complicated */ @@ -7261,7 +7304,7 @@ retry: pipe_config->fdi_lanes = lane; intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock, - link_bw, &pipe_config->fdi_m_n, false); + link_bw, &pipe_config->fdi_m_n, false, false); ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); if (ret == -EDEADLK) @@ -7508,11 +7551,15 @@ void intel_link_compute_m_n(u16 bits_per_pixel, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, - bool constant_n) + bool constant_n, bool fec_enable) { - m_n->tu = 64; + u32 data_clock = bits_per_pixel * pixel_clock; + + if (fec_enable) + data_clock = intel_dp_mode_to_fec_clock(data_clock); - compute_m_n(bits_per_pixel * pixel_clock, + m_n->tu = 64; + compute_m_n(data_clock, link_clock * nlanes * 8, &m_n->gmch_m, &m_n->gmch_n, constant_n); @@ -7522,6 +7569,27 @@ intel_link_compute_m_n(u16 bits_per_pixel, int nlanes, constant_n); } +static void intel_panel_sanitize_ssc(struct drm_i915_private *dev_priv) +{ + /* + * There may be no VBT; and if the BIOS enabled SSC we can + * just keep using it to avoid unnecessary flicker. Whereas if the + * BIOS isn't using it, don't assume it will work even if the VBT + * indicates as much. + */ + if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) { + bool bios_lvds_use_ssc = I915_READ(PCH_DREF_CONTROL) & + DREF_SSC1_ENABLE; + + if (dev_priv->vbt.lvds_use_ssc != bios_lvds_use_ssc) { + DRM_DEBUG_KMS("SSC %s by BIOS, overriding VBT which says %s\n", + enableddisabled(bios_lvds_use_ssc), + enableddisabled(dev_priv->vbt.lvds_use_ssc)); + dev_priv->vbt.lvds_use_ssc = bios_lvds_use_ssc; + } + } +} + static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) { if (i915_modparams.panel_use_ssc >= 0) @@ -8173,6 +8241,21 @@ static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state) (crtc_state->pipe_src_h - 1)); } +static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; + + if (IS_GEN(dev_priv, 2)) + return false; + + if (INTEL_GEN(dev_priv) >= 9 || + IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) + return I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK_HSW; + else + return I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK; +} + static void intel_get_pipe_timings(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -8211,7 +8294,7 @@ static void intel_get_pipe_timings(struct intel_crtc *crtc, pipe_config->base.adjusted_mode.crtc_vsync_start = (tmp & 0xffff) + 1; pipe_config->base.adjusted_mode.crtc_vsync_end = ((tmp >> 16) & 0xffff) + 1; - if (I915_READ(PIPECONF(cpu_transcoder)) & PIPECONF_INTERLACE_MASK) { + if (intel_pipe_is_interlaced(pipe_config)) { pipe_config->base.adjusted_mode.flags |= DRM_MODE_FLAG_INTERLACE; pipe_config->base.adjusted_mode.crtc_vtotal += 1; pipe_config->base.adjusted_mode.crtc_vblank_end += 1; @@ -8543,7 +8626,7 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = pipe_config->cpu_transcoder; + enum pipe pipe = crtc->pipe; struct dpll clock; u32 mdiv; int refclk = 100000; @@ -8653,7 +8736,7 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = pipe_config->cpu_transcoder; + enum pipe pipe = crtc->pipe; enum dpio_channel port = vlv_pipe_to_channel(pipe); struct dpll clock; u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3; @@ -8682,47 +8765,24 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, pipe_config->port_clock = chv_calc_dpll_params(refclk, &clock); } -static void intel_get_crtc_ycbcr_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) +static enum intel_output_format +bdw_get_pipemisc_output_format(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum intel_output_format output = INTEL_OUTPUT_FORMAT_RGB; - - pipe_config->lspcon_downsampling = false; + u32 tmp; - if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) { - u32 tmp = I915_READ(PIPEMISC(crtc->pipe)); + tmp = I915_READ(PIPEMISC(crtc->pipe)); - if (tmp & PIPEMISC_OUTPUT_COLORSPACE_YUV) { - bool ycbcr420_enabled = tmp & PIPEMISC_YUV420_ENABLE; - bool blend = tmp & PIPEMISC_YUV420_MODE_FULL_BLEND; + if (tmp & PIPEMISC_YUV420_ENABLE) { + /* We support 4:2:0 in full blend mode only */ + WARN_ON((tmp & PIPEMISC_YUV420_MODE_FULL_BLEND) == 0); - if (ycbcr420_enabled) { - /* We support 4:2:0 in full blend mode only */ - if (!blend) - output = INTEL_OUTPUT_FORMAT_INVALID; - else if (!(IS_GEMINILAKE(dev_priv) || - INTEL_GEN(dev_priv) >= 10)) - output = INTEL_OUTPUT_FORMAT_INVALID; - else - output = INTEL_OUTPUT_FORMAT_YCBCR420; - } else { - /* - * Currently there is no interface defined to - * check user preference between RGB/YCBCR444 - * or YCBCR420. So the only possible case for - * YCBCR444 usage is driving YCBCR420 output - * with LSPCON, when pipe is configured for - * YCBCR444 output and LSPCON takes care of - * downsampling it. - */ - pipe_config->lspcon_downsampling = true; - output = INTEL_OUTPUT_FORMAT_YCBCR444; - } - } + return INTEL_OUTPUT_FORMAT_YCBCR420; + } else if (tmp & PIPEMISC_OUTPUT_COLORSPACE_YUV) { + return INTEL_OUTPUT_FORMAT_YCBCR444; + } else { + return INTEL_OUTPUT_FORMAT_RGB; } - - pipe_config->output_format = output; } static void i9xx_get_pipe_color_config(struct intel_crtc_state *crtc_state) @@ -8760,6 +8820,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe; pipe_config->shared_dpll = NULL; + pipe_config->master_transcoder = INVALID_TRANSCODER; ret = false; @@ -9398,9 +9459,19 @@ static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state) else val |= PIPECONF_PROGRESSIVE; + /* + * This would end up with an odd purple hue over + * the entire display. Make sure we don't do it. + */ + WARN_ON(crtc_state->limited_color_range && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB); + if (crtc_state->limited_color_range) val |= PIPECONF_COLOR_RANGE_SELECT; + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + val |= PIPECONF_OUTPUT_COLORSPACE_YUV709; + val |= PIPECONF_GAMMA_MODE(crtc_state->gamma_mode); I915_WRITE(PIPECONF(pipe), val); @@ -9422,6 +9493,10 @@ static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state) else val |= PIPECONF_PROGRESSIVE; + if (IS_HASWELL(dev_priv) && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + val |= PIPECONF_OUTPUT_COLORSPACE_YUV_HSW; + I915_WRITE(PIPECONF(cpu_transcoder), val); POSTING_READ(PIPECONF(cpu_transcoder)); } @@ -9572,7 +9647,7 @@ static void ironlake_compute_dpll(struct intel_crtc *crtc, * clear if it''s a win or loss power wise. No point in doing * this on ILK at all since it has a fixed DPLL<->pipe mapping. */ - if (INTEL_INFO(dev_priv)->num_pipes == 3 && + if (INTEL_NUM_PIPES(dev_priv) == 3 && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_ANALOG)) dpll |= DPLL_SDVO_HIGH_SPEED; @@ -9871,8 +9946,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, offset = I915_READ(PLANE_OFFSET(pipe, plane_id)); val = I915_READ(PLANE_SIZE(pipe, plane_id)); - fb->height = ((val >> 16) & 0xfff) + 1; - fb->width = ((val >> 0) & 0x1fff) + 1; + fb->height = ((val >> 16) & 0xffff) + 1; + fb->width = ((val >> 0) & 0xffff) + 1; val = I915_READ(PLANE_STRIDE(pipe, plane_id)); stride_mult = skl_plane_stride_mult(fb, 0, DRM_MODE_ROTATE_0); @@ -9933,9 +10008,9 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, if (!wakeref) return false; - pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe; pipe_config->shared_dpll = NULL; + pipe_config->master_transcoder = INVALID_TRANSCODER; ret = false; tmp = I915_READ(PIPECONF(crtc->pipe)); @@ -9962,6 +10037,16 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, if (tmp & PIPECONF_COLOR_RANGE_SELECT) pipe_config->limited_color_range = true; + switch (tmp & PIPECONF_OUTPUT_COLORSPACE_MASK) { + case PIPECONF_OUTPUT_COLORSPACE_YUV601: + case PIPECONF_OUTPUT_COLORSPACE_YUV709: + pipe_config->output_format = INTEL_OUTPUT_FORMAT_YCBCR444; + break; + default: + pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + break; + } + pipe_config->gamma_mode = (tmp & PIPECONF_GAMMA_MODE_MASK_ILK) >> PIPECONF_GAMMA_MODE_SHIFT; @@ -10376,6 +10461,59 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, } } +static enum transcoder transcoder_master_readout(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder) +{ + u32 trans_port_sync, master_select; + + trans_port_sync = I915_READ(TRANS_DDI_FUNC_CTL2(cpu_transcoder)); + + if ((trans_port_sync & PORT_SYNC_MODE_ENABLE) == 0) + return INVALID_TRANSCODER; + + master_select = trans_port_sync & + PORT_SYNC_MODE_MASTER_SELECT_MASK; + if (master_select == 0) + return TRANSCODER_EDP; + else + return master_select - 1; +} + +static void icelake_get_trans_port_sync_config(struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + u32 transcoders; + enum transcoder cpu_transcoder; + + crtc_state->master_transcoder = transcoder_master_readout(dev_priv, + crtc_state->cpu_transcoder); + + transcoders = BIT(TRANSCODER_A) | + BIT(TRANSCODER_B) | + BIT(TRANSCODER_C) | + BIT(TRANSCODER_D); + for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { + enum intel_display_power_domain power_domain; + intel_wakeref_t trans_wakeref; + + power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); + trans_wakeref = intel_display_power_get_if_enabled(dev_priv, + power_domain); + + if (!trans_wakeref) + continue; + + if (transcoder_master_readout(dev_priv, cpu_transcoder) == + crtc_state->cpu_transcoder) + crtc_state->sync_mode_slaves_mask |= BIT(cpu_transcoder); + + intel_display_power_put(dev_priv, power_domain, trans_wakeref); + } + + WARN_ON(crtc_state->master_transcoder != INVALID_TRANSCODER && + crtc_state->sync_mode_slaves_mask); +} + static bool haswell_get_pipe_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -10387,6 +10525,8 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, intel_crtc_init_scalers(crtc, pipe_config); + pipe_config->master_transcoder = INVALID_TRANSCODER; + power_domain = POWER_DOMAIN_PIPE(crtc->pipe); wf = intel_display_power_get_if_enabled(dev_priv, power_domain); if (!wf) @@ -10417,7 +10557,30 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, } intel_get_pipe_src_size(crtc, pipe_config); - intel_get_crtc_ycbcr_config(crtc, pipe_config); + + if (IS_HASWELL(dev_priv)) { + u32 tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder)); + + if (tmp & PIPECONF_OUTPUT_COLORSPACE_YUV_HSW) + pipe_config->output_format = INTEL_OUTPUT_FORMAT_YCBCR444; + else + pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + } else { + pipe_config->output_format = + bdw_get_pipemisc_output_format(crtc); + + /* + * Currently there is no interface defined to + * check user preference between RGB/YCBCR444 + * or YCBCR420. So the only possible case for + * YCBCR444 usage is driving YCBCR420 output + * with LSPCON, when pipe is configured for + * YCBCR444 output and LSPCON takes care of + * downsampling it. + */ + pipe_config->lspcon_downsampling = + pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR444; + } pipe_config->gamma_mode = I915_READ(GAMMA_MODE(crtc->pipe)); @@ -10472,6 +10635,10 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, pipe_config->pixel_multiplier = 1; } + if (INTEL_GEN(dev_priv) >= 11 && + !transcoder_is_dsi(pipe_config->cpu_transcoder)) + icelake_get_trans_port_sync_config(pipe_config); + out: for_each_power_domain(power_domain, power_domain_mask) intel_display_power_put(dev_priv, @@ -10493,21 +10660,13 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) else base = intel_plane_ggtt_offset(plane_state); - base += plane_state->color_plane[0].offset; - - /* ILK+ do this automagically */ - if (HAS_GMCH(dev_priv) && - plane_state->base.rotation & DRM_MODE_ROTATE_180) - base += (plane_state->base.crtc_h * - plane_state->base.crtc_w - 1) * fb->format->cpp[0]; - - return base; + return base + plane_state->color_plane[0].offset; } static u32 intel_cursor_position(const struct intel_plane_state *plane_state) { - int x = plane_state->base.crtc_x; - int y = plane_state->base.crtc_y; + int x = plane_state->base.dst.x1; + int y = plane_state->base.dst.y1; u32 pos = 0; if (x < 0) { @@ -10529,8 +10688,8 @@ static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state) { const struct drm_mode_config *config = &plane_state->base.plane->dev->mode_config; - int width = plane_state->base.crtc_w; - int height = plane_state->base.crtc_h; + int width = drm_rect_width(&plane_state->base.dst); + int height = drm_rect_height(&plane_state->base.dst); return width > 0 && width <= config->cursor_width && height > 0 && height <= config->cursor_height; @@ -10538,6 +10697,9 @@ static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state) static int intel_cursor_check_surface(struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + unsigned int rotation = plane_state->base.rotation; int src_x, src_y; u32 offset; int ret; @@ -10549,8 +10711,8 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state) if (!plane_state->base.visible) return 0; - src_x = plane_state->base.src_x >> 16; - src_y = plane_state->base.src_y >> 16; + src_x = plane_state->base.src.x1 >> 16; + src_y = plane_state->base.src.y1 >> 16; intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); offset = intel_plane_compute_aligned_offset(&src_x, &src_y, @@ -10561,7 +10723,25 @@ static int intel_cursor_check_surface(struct intel_plane_state *plane_state) return -EINVAL; } + /* + * Put the final coordinates back so that the src + * coordinate checks will see the right values. + */ + drm_rect_translate_to(&plane_state->base.src, + src_x << 16, src_y << 16); + + /* ILK+ do this automagically in hardware */ + if (HAS_GMCH(dev_priv) && rotation & DRM_MODE_ROTATE_180) { + const struct drm_framebuffer *fb = plane_state->base.fb; + int src_w = drm_rect_width(&plane_state->base.src) >> 16; + int src_h = drm_rect_height(&plane_state->base.src) >> 16; + + offset += (src_h * src_w - 1) * fb->format->cpp[0]; + } + plane_state->color_plane[0].offset = offset; + plane_state->color_plane[0].x = src_x; + plane_state->color_plane[0].y = src_y; return 0; } @@ -10585,6 +10765,10 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, if (ret) return ret; + /* Use the unclipped src/dst rectangles, which we program to hw */ + plane_state->base.src = drm_plane_state_src(&plane_state->base); + plane_state->base.dst = drm_plane_state_dest(&plane_state->base); + ret = intel_cursor_check_surface(plane_state); if (ret) return ret; @@ -10627,7 +10811,7 @@ static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state, static bool i845_cursor_size_ok(const struct intel_plane_state *plane_state) { - int width = plane_state->base.crtc_w; + int width = drm_rect_width(&plane_state->base.dst); /* * 845g/865g are only limited by the width of their cursors, @@ -10653,8 +10837,8 @@ static int i845_check_cursor(struct intel_crtc_state *crtc_state, /* Check for which cursor types we support */ if (!i845_cursor_size_ok(plane_state)) { DRM_DEBUG("Cursor dimension %dx%d not supported\n", - plane_state->base.crtc_w, - plane_state->base.crtc_h); + drm_rect_width(&plane_state->base.dst), + drm_rect_height(&plane_state->base.dst)); return -EINVAL; } @@ -10687,8 +10871,8 @@ static void i845_update_cursor(struct intel_plane *plane, unsigned long irqflags; if (plane_state && plane_state->base.visible) { - unsigned int width = plane_state->base.crtc_w; - unsigned int height = plane_state->base.crtc_h; + unsigned int width = drm_rect_width(&plane_state->base.src); + unsigned int height = drm_rect_height(&plane_state->base.dst); cntl = plane_state->ctl | i845_cursor_ctl_crtc(crtc_state); @@ -10790,7 +10974,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, if (IS_GEN(dev_priv, 6) || IS_IVYBRIDGE(dev_priv)) cntl |= MCURSOR_TRICKLE_FEED_DISABLE; - switch (plane_state->base.crtc_w) { + switch (drm_rect_width(&plane_state->base.dst)) { case 64: cntl |= MCURSOR_MODE_64_ARGB_AX; break; @@ -10801,7 +10985,7 @@ static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, cntl |= MCURSOR_MODE_256_ARGB_AX; break; default: - MISSING_CASE(plane_state->base.crtc_w); + MISSING_CASE(drm_rect_width(&plane_state->base.dst)); return 0; } @@ -10815,8 +10999,8 @@ static bool i9xx_cursor_size_ok(const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); - int width = plane_state->base.crtc_w; - int height = plane_state->base.crtc_h; + int width = drm_rect_width(&plane_state->base.dst); + int height = drm_rect_height(&plane_state->base.dst); if (!intel_cursor_size_ok(plane_state)) return false; @@ -10869,17 +11053,19 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, /* Check for which cursor types we support */ if (!i9xx_cursor_size_ok(plane_state)) { DRM_DEBUG("Cursor dimension %dx%d not supported\n", - plane_state->base.crtc_w, - plane_state->base.crtc_h); + drm_rect_width(&plane_state->base.dst), + drm_rect_height(&plane_state->base.dst)); return -EINVAL; } WARN_ON(plane_state->base.visible && plane_state->color_plane[0].stride != fb->pitches[0]); - if (fb->pitches[0] != plane_state->base.crtc_w * fb->format->cpp[0]) { + if (fb->pitches[0] != + drm_rect_width(&plane_state->base.dst) * fb->format->cpp[0]) { DRM_DEBUG_KMS("Invalid cursor stride (%u) (cursor width %d)\n", - fb->pitches[0], plane_state->base.crtc_w); + fb->pitches[0], + drm_rect_width(&plane_state->base.dst)); return -EINVAL; } @@ -10894,7 +11080,7 @@ static int i9xx_check_cursor(struct intel_crtc_state *crtc_state, * Refuse the put the cursor into that compromised position. */ if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C && - plane_state->base.visible && plane_state->base.crtc_x < 0) { + plane_state->base.visible && plane_state->base.dst.x1 < 0) { DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n"); return -EINVAL; } @@ -10914,11 +11100,14 @@ static void i9xx_update_cursor(struct intel_plane *plane, unsigned long irqflags; if (plane_state && plane_state->base.visible) { + unsigned width = drm_rect_width(&plane_state->base.dst); + unsigned height = drm_rect_height(&plane_state->base.dst); + cntl = plane_state->ctl | i9xx_cursor_ctl_crtc(crtc_state); - if (plane_state->base.crtc_h != plane_state->base.crtc_w) - fbc_ctl = CUR_FBC_CTL_EN | (plane_state->base.crtc_h - 1); + if (width != height) + fbc_ctl = CUR_FBC_CTL_EN | (height - 1); base = intel_cursor_base(plane_state); pos = intel_cursor_position(plane_state); @@ -11265,7 +11454,7 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - int pipe = pipe_config->cpu_transcoder; + enum pipe pipe = crtc->pipe; u32 dpll = pipe_config->dpll_hw_state.dpll; u32 fp; struct dpll clock; @@ -11489,7 +11678,6 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat bool was_crtc_enabled = old_crtc_state->base.active; bool is_crtc_enabled = crtc_state->base.active; bool turn_off, turn_on, visible, was_visible; - struct drm_framebuffer *fb = plane_state->base.fb; int ret; if (INTEL_GEN(dev_priv) >= 9 && plane->id != PLANE_CURSOR) { @@ -11523,19 +11711,12 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat if (!was_visible && !visible) return 0; - if (fb != old_plane_state->base.fb) - crtc_state->fb_changed = true; - turn_off = was_visible && (!visible || mode_changed); turn_on = visible && (!was_visible || mode_changed); - DRM_DEBUG_ATOMIC("[CRTC:%d:%s] has [PLANE:%d:%s] with fb %i\n", + DRM_DEBUG_ATOMIC("[CRTC:%d:%s] with [PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n", crtc->base.base.id, crtc->base.name, plane->base.base.id, plane->base.name, - fb ? fb->base.id : -1); - - DRM_DEBUG_ATOMIC("[PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n", - plane->base.base.id, plane->base.name, was_visible, visible, turn_off, turn_on, mode_changed); @@ -11644,7 +11825,7 @@ static int icl_add_linked_planes(struct intel_atomic_state *state) int i; for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - linked = plane_state->linked_plane; + linked = plane_state->planar_linked_plane; if (!linked) continue; @@ -11653,8 +11834,8 @@ static int icl_add_linked_planes(struct intel_atomic_state *state) if (IS_ERR(linked_plane_state)) return PTR_ERR(linked_plane_state); - WARN_ON(linked_plane_state->linked_plane != plane); - WARN_ON(linked_plane_state->slave == plane_state->slave); + WARN_ON(linked_plane_state->planar_linked_plane != plane); + WARN_ON(linked_plane_state->planar_slave == plane_state->planar_slave); } return 0; @@ -11677,16 +11858,16 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) * in the crtc_state->active_planes mask. */ for_each_new_intel_plane_in_state(state, plane, plane_state, i) { - if (plane->pipe != crtc->pipe || !plane_state->linked_plane) + if (plane->pipe != crtc->pipe || !plane_state->planar_linked_plane) continue; - plane_state->linked_plane = NULL; - if (plane_state->slave && !plane_state->base.visible) { + plane_state->planar_linked_plane = NULL; + if (plane_state->planar_slave && !plane_state->base.visible) { crtc_state->active_planes &= ~BIT(plane->id); crtc_state->update_planes |= BIT(plane->id); } - plane_state->slave = false; + plane_state->planar_slave = false; } if (!crtc_state->nv12_planes) @@ -11720,10 +11901,10 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state) return -EINVAL; } - plane_state->linked_plane = linked; + plane_state->planar_linked_plane = linked; - linked_state->slave = true; - linked_state->linked_plane = plane; + linked_state->planar_slave = true; + linked_state->planar_linked_plane = plane; crtc_state->active_planes |= BIT(linked->id); crtc_state->update_planes |= BIT(linked->id); DRM_DEBUG_KMS("Using %s as Y plane for %s\n", linked->base.name, plane->base.name); @@ -11743,25 +11924,108 @@ static bool c8_planes_changed(const struct intel_crtc_state *new_crtc_state) return !old_crtc_state->c8_planes != !new_crtc_state->c8_planes; } -static int intel_crtc_atomic_check(struct drm_crtc *crtc, - struct drm_crtc_state *crtc_state) +static int icl_add_sync_mode_crtcs(struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_crtc_state *pipe_config = - to_intel_crtc_state(crtc_state); + struct drm_crtc *crtc = crtc_state->base.crtc; + struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->base.state); + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct drm_connector *master_connector, *connector; + struct drm_connector_state *connector_state; + struct drm_connector_list_iter conn_iter; + struct drm_crtc *master_crtc = NULL; + struct drm_crtc_state *master_crtc_state; + struct intel_crtc_state *master_pipe_config; + int i, tile_group_id; + + if (INTEL_GEN(dev_priv) < 11) + return 0; + + /* + * In case of tiled displays there could be one or more slaves but there is + * only one master. Lets make the CRTC used by the connector corresponding + * to the last horizonal and last vertical tile a master/genlock CRTC. + * All the other CRTCs corresponding to other tiles of the same Tile group + * are the slave CRTCs and hold a pointer to their genlock CRTC. + */ + for_each_new_connector_in_state(&state->base, connector, connector_state, i) { + if (connector_state->crtc != crtc) + continue; + if (!connector->has_tile) + continue; + if (crtc_state->base.mode.hdisplay != connector->tile_h_size || + crtc_state->base.mode.vdisplay != connector->tile_v_size) + return 0; + if (connector->tile_h_loc == connector->num_h_tile - 1 && + connector->tile_v_loc == connector->num_v_tile - 1) + continue; + crtc_state->sync_mode_slaves_mask = 0; + tile_group_id = connector->tile_group->id; + drm_connector_list_iter_begin(&dev_priv->drm, &conn_iter); + drm_for_each_connector_iter(master_connector, &conn_iter) { + struct drm_connector_state *master_conn_state = NULL; + + if (!master_connector->has_tile) + continue; + if (master_connector->tile_h_loc != master_connector->num_h_tile - 1 || + master_connector->tile_v_loc != master_connector->num_v_tile - 1) + continue; + if (master_connector->tile_group->id != tile_group_id) + continue; + + master_conn_state = drm_atomic_get_connector_state(&state->base, + master_connector); + if (IS_ERR(master_conn_state)) { + drm_connector_list_iter_end(&conn_iter); + return PTR_ERR(master_conn_state); + } + if (master_conn_state->crtc) { + master_crtc = master_conn_state->crtc; + break; + } + } + drm_connector_list_iter_end(&conn_iter); + + if (!master_crtc) { + DRM_DEBUG_KMS("Could not find Master CRTC for Slave CRTC %d\n", + connector_state->crtc->base.id); + return -EINVAL; + } + + master_crtc_state = drm_atomic_get_crtc_state(&state->base, + master_crtc); + if (IS_ERR(master_crtc_state)) + return PTR_ERR(master_crtc_state); + + master_pipe_config = to_intel_crtc_state(master_crtc_state); + crtc_state->master_transcoder = master_pipe_config->cpu_transcoder; + master_pipe_config->sync_mode_slaves_mask |= + BIT(crtc_state->cpu_transcoder); + DRM_DEBUG_KMS("Master Transcoder = %s added for Slave CRTC = %d, slave transcoder bitmask = %d\n", + transcoder_name(crtc_state->master_transcoder), + crtc_state->base.crtc->base.id, + master_pipe_config->sync_mode_slaves_mask); + } + + return 0; +} + +static int intel_crtc_atomic_check(struct intel_atomic_state *state, + struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + bool mode_changed = needs_modeset(crtc_state); int ret; - bool mode_changed = needs_modeset(pipe_config); if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv) && - mode_changed && !crtc_state->active) - pipe_config->update_wm_post = true; + mode_changed && !crtc_state->base.active) + crtc_state->update_wm_post = true; - if (mode_changed && crtc_state->enable && + if (mode_changed && crtc_state->base.enable && dev_priv->display.crtc_compute_clock && - !WARN_ON(pipe_config->shared_dpll)) { - ret = dev_priv->display.crtc_compute_clock(intel_crtc, - pipe_config); + !WARN_ON(crtc_state->shared_dpll)) { + ret = dev_priv->display.crtc_compute_clock(crtc, crtc_state); if (ret) return ret; } @@ -11770,19 +12034,19 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, * May need to update pipe gamma enable bits * when C8 planes are getting enabled/disabled. */ - if (c8_planes_changed(pipe_config)) - crtc_state->color_mgmt_changed = true; + if (c8_planes_changed(crtc_state)) + crtc_state->base.color_mgmt_changed = true; - if (mode_changed || pipe_config->update_pipe || - crtc_state->color_mgmt_changed) { - ret = intel_color_check(pipe_config); + if (mode_changed || crtc_state->update_pipe || + crtc_state->base.color_mgmt_changed) { + ret = intel_color_check(crtc_state); if (ret) return ret; } ret = 0; if (dev_priv->display.compute_pipe_wm) { - ret = dev_priv->display.compute_pipe_wm(pipe_config); + ret = dev_priv->display.compute_pipe_wm(crtc_state); if (ret) { DRM_DEBUG_KMS("Target pipe watermarks are invalid\n"); return ret; @@ -11798,7 +12062,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, * old state and the new state. We can program these * immediately. */ - ret = dev_priv->display.compute_intermediate_wm(pipe_config); + ret = dev_priv->display.compute_intermediate_wm(crtc_state); if (ret) { DRM_DEBUG_KMS("No valid intermediate pipe watermarks are possible\n"); return ret; @@ -11806,29 +12070,24 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, } if (INTEL_GEN(dev_priv) >= 9) { - if (mode_changed || pipe_config->update_pipe) - ret = skl_update_scaler_crtc(pipe_config); + if (mode_changed || crtc_state->update_pipe) + ret = skl_update_scaler_crtc(crtc_state); if (!ret) - ret = icl_check_nv12_planes(pipe_config); + ret = icl_check_nv12_planes(crtc_state); if (!ret) - ret = skl_check_pipe_max_pixel_rate(intel_crtc, - pipe_config); + ret = skl_check_pipe_max_pixel_rate(crtc, crtc_state); if (!ret) - ret = intel_atomic_setup_scalers(dev_priv, intel_crtc, - pipe_config); + ret = intel_atomic_setup_scalers(dev_priv, crtc, + crtc_state); } if (HAS_IPS(dev_priv)) - pipe_config->ips_enabled = hsw_compute_ips_config(pipe_config); + crtc_state->ips_enabled = hsw_compute_ips_config(crtc_state); return ret; } -static const struct drm_crtc_helper_funcs intel_helper_funcs = { - .atomic_check = intel_crtc_atomic_check, -}; - static void intel_modeset_update_connector_atomic_state(struct drm_device *dev) { struct intel_connector *connector; @@ -12138,6 +12397,15 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, intel_dpll_dump_hw_state(dev_priv, &pipe_config->dpll_hw_state); + if (IS_CHERRYVIEW(dev_priv)) + DRM_DEBUG_KMS("cgm_mode: 0x%x gamma_mode: 0x%x gamma_enable: %d csc_enable: %d\n", + pipe_config->cgm_mode, pipe_config->gamma_mode, + pipe_config->gamma_enable, pipe_config->csc_enable); + else + DRM_DEBUG_KMS("csc_mode: 0x%x gamma_mode: 0x%x gamma_enable: %d csc_enable: %d\n", + pipe_config->csc_mode, pipe_config->gamma_mode, + pipe_config->gamma_enable, pipe_config->csc_enable); + dump_planes: if (!state) return; @@ -12239,6 +12507,13 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) saved_state->wm = crtc_state->wm; + /* + * Save the slave bitmask which gets filled for master crtc state during + * slave atomic check call. + */ + if (is_trans_port_sync_master(crtc_state)) + saved_state->sync_mode_slaves_mask = + crtc_state->sync_mode_slaves_mask; /* Keep base drm_crtc_state intact, only clear our extended struct */ BUILD_BUG_ON(offsetof(struct intel_crtc_state, base)); @@ -12332,6 +12607,15 @@ encoder_retry: drm_mode_set_crtcinfo(&pipe_config->base.adjusted_mode, CRTC_STEREO_DOUBLE); + /* Set the crtc_state defaults for trans_port_sync */ + pipe_config->master_transcoder = INVALID_TRANSCODER; + ret = icl_add_sync_mode_crtcs(pipe_config); + if (ret) { + DRM_DEBUG_KMS("Cannot assign Sync Mode CRTCs: %d\n", + ret); + return ret; + } + /* Pass our mode to the connectors and the CRTC to give them a chance to * adjust it according to limitations or connector properties, and also * a chance to reject the mode entirely. @@ -12464,22 +12748,23 @@ pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv, if ((drm_debug & DRM_UT_KMS) == 0) return; - drm_dbg(DRM_UT_KMS, "fastset mismatch in %s infoframe", name); - drm_dbg(DRM_UT_KMS, "expected:"); + DRM_DEBUG_KMS("fastset mismatch in %s infoframe\n", name); + DRM_DEBUG_KMS("expected:\n"); hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, a); - drm_dbg(DRM_UT_KMS, "found"); + DRM_DEBUG_KMS("found:\n"); hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, b); } else { - drm_err("mismatch in %s infoframe", name); - drm_err("expected:"); + DRM_ERROR("mismatch in %s infoframe\n", name); + DRM_ERROR("expected:\n"); hdmi_infoframe_log(KERN_ERR, dev_priv->drm.dev, a); - drm_err("found"); + DRM_ERROR("found:\n"); hdmi_infoframe_log(KERN_ERR, dev_priv->drm.dev, b); } } -static void __printf(3, 4) -pipe_config_mismatch(bool fastset, const char *name, const char *format, ...) +static void __printf(4, 5) +pipe_config_mismatch(bool fastset, const struct intel_crtc *crtc, + const char *name, const char *format, ...) { struct va_format vaf; va_list args; @@ -12489,9 +12774,11 @@ pipe_config_mismatch(bool fastset, const char *name, const char *format, ...) vaf.va = &args; if (fastset) - drm_dbg(DRM_UT_KMS, "fastset mismatch in %s %pV", name, &vaf); + DRM_DEBUG_KMS("[CRTC:%d:%s] fastset mismatch in %s %pV\n", + crtc->base.base.id, crtc->base.name, name, &vaf); else - drm_err("mismatch in %s %pV", name, &vaf); + DRM_ERROR("[CRTC:%d:%s] mismatch in %s %pV\n", + crtc->base.base.id, crtc->base.name, name, &vaf); va_end(args); } @@ -12519,7 +12806,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, bool fastset) { struct drm_i915_private *dev_priv = to_i915(current_config->base.crtc->dev); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); bool ret = true; + u32 bp_gamma = 0; bool fixup_inherited = fastset && (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); @@ -12531,8 +12820,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(expected 0x%08x, found 0x%08x)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(expected 0x%08x, found 0x%08x)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12541,8 +12830,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_I(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %i, found %i)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(expected %i, found %i)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12551,8 +12840,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_BOOL(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %s, found %s)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(expected %s, found %s)", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ @@ -12568,8 +12857,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \ PIPE_CONF_CHECK_BOOL(name); \ } else { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ @@ -12578,8 +12867,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_P(name) do { \ if (current_config->name != pipe_config->name) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %p, found %p)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(expected %p, found %p)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12590,9 +12879,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, if (!intel_compare_link_m_n(¤t_config->name, \ &pipe_config->name,\ !fastset)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected tu %i gmch %i/%i link %i/%i, " \ - "found tu %i, gmch %i/%i link %i/%i)\n", \ + "found tu %i, gmch %i/%i link %i/%i)", \ current_config->name.tu, \ current_config->name.gmch_m, \ current_config->name.gmch_n, \ @@ -12617,10 +12906,10 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, &pipe_config->name, !fastset) && \ !intel_compare_link_m_n(¤t_config->alt_name, \ &pipe_config->name, !fastset)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ "(expected tu %i gmch %i/%i link %i/%i, " \ "or tu %i gmch %i/%i link %i/%i, " \ - "found tu %i, gmch %i/%i link %i/%i)\n", \ + "found tu %i, gmch %i/%i link %i/%i)", \ current_config->name.tu, \ current_config->name.gmch_m, \ current_config->name.gmch_n, \ @@ -12642,8 +12931,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_FLAGS(name, mask) do { \ if ((current_config->name ^ pipe_config->name) & (mask)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(%x) (expected %i, found %i)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(%x) (expected %i, found %i)", \ (mask), \ current_config->name & (mask), \ pipe_config->name & (mask)); \ @@ -12653,8 +12942,8 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \ if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \ - pipe_config_mismatch(fastset, __stringify(name), \ - "(expected %i, found %i)\n", \ + pipe_config_mismatch(fastset, crtc, __stringify(name), \ + "(expected %i, found %i)", \ current_config->name, \ pipe_config->name); \ ret = false; \ @@ -12671,6 +12960,24 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, } \ } while (0) +#define PIPE_CONF_CHECK_COLOR_LUT(name1, name2, bit_precision) do { \ + if (current_config->name1 != pipe_config->name1) { \ + pipe_config_mismatch(fastset, crtc, __stringify(name1), \ + "(expected %i, found %i, won't compare lut values)", \ + current_config->name1, \ + pipe_config->name1); \ + ret = false;\ + } else { \ + if (!intel_color_lut_equal(current_config->name2, \ + pipe_config->name2, pipe_config->name1, \ + bit_precision)) { \ + pipe_config_mismatch(fastset, crtc, __stringify(name2), \ + "hw_state doesn't match sw_state"); \ + ret = false; \ + } \ + } \ +} while (0) + #define PIPE_CONF_QUIRK(quirk) \ ((current_config->quirks | pipe_config->quirks) & (quirk)) @@ -12709,6 +13016,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_I(pixel_multiplier); PIPE_CONF_CHECK_I(output_format); + PIPE_CONF_CHECK_I(dc3co_exitline); PIPE_CONF_CHECK_BOOL(has_hdmi_sink); if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) @@ -12717,6 +13025,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_BOOL(hdmi_scrambling); PIPE_CONF_CHECK_BOOL(hdmi_high_tmds_clock_ratio); PIPE_CONF_CHECK_BOOL(has_infoframe); + PIPE_CONF_CHECK_BOOL(fec_enable); PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_audio); @@ -12766,6 +13075,11 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_X(csc_mode); PIPE_CONF_CHECK_BOOL(gamma_enable); PIPE_CONF_CHECK_BOOL(csc_enable); + + bp_gamma = intel_color_get_gamma_bit_precision(pipe_config); + if (bp_gamma) + PIPE_CONF_CHECK_COLOR_LUT(gamma_mode, base.gamma_lut, bp_gamma); + } PIPE_CONF_CHECK_BOOL(double_wide); @@ -12821,6 +13135,9 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_INFOFRAME(hdmi); PIPE_CONF_CHECK_INFOFRAME(drm); + PIPE_CONF_CHECK_I(sync_mode_slaves_mask); + PIPE_CONF_CHECK_I(master_transcoder); + #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I #undef PIPE_CONF_CHECK_BOOL @@ -12828,6 +13145,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, #undef PIPE_CONF_CHECK_P #undef PIPE_CONF_CHECK_FLAGS #undef PIPE_CONF_CHECK_CLOCK_FUZZY +#undef PIPE_CONF_CHECK_COLOR_LUT #undef PIPE_CONF_QUIRK return ret; @@ -13139,7 +13457,7 @@ intel_verify_planes(struct intel_atomic_state *state) for_each_new_intel_plane_in_state(state, plane, plane_state, i) - assert_plane(plane, plane_state->slave || + assert_plane(plane, plane_state->planar_slave || plane_state->base.visible); } @@ -13255,10 +13573,15 @@ intel_modeset_verify_disabled(struct drm_i915_private *dev_priv, verify_disabled_dpll_state(dev_priv); } -static void update_scanline_offset(const struct intel_crtc_state *crtc_state) +static void +intel_crtc_update_active_timings(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + + drm_calc_timestamping_constants(&crtc->base, adjusted_mode); /* * The scanline counter increments at the leading edge of hsync. @@ -13288,7 +13611,6 @@ static void update_scanline_offset(const struct intel_crtc_state *crtc_state) * answer that's slightly in the future. */ if (IS_GEN(dev_priv, 2)) { - const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; int vtotal; vtotal = adjusted_mode->crtc_vtotal; @@ -13299,8 +13621,9 @@ static void update_scanline_offset(const struct intel_crtc_state *crtc_state) } else if (HAS_DDI(dev_priv) && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) { crtc->scanline_offset = 2; - } else + } else { crtc->scanline_offset = 1; + } } static void intel_modeset_clear_plls(struct intel_atomic_state *state) @@ -13382,65 +13705,12 @@ static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state) return 0; } -static int intel_lock_all_pipes(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc *crtc; - - /* Add all pipes to the state */ - for_each_intel_crtc(&dev_priv->drm, crtc) { - struct intel_crtc_state *crtc_state; - - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - } - - return 0; -} - -static int intel_modeset_all_pipes(struct intel_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->base.dev); - struct intel_crtc *crtc; - - /* - * Add all pipes to the state, and force - * a modeset on all the active ones. - */ - for_each_intel_crtc(&dev_priv->drm, crtc) { - struct intel_crtc_state *crtc_state; - int ret; - - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); - - if (!crtc_state->base.active || needs_modeset(crtc_state)) - continue; - - crtc_state->base.mode_changed = true; - - ret = drm_atomic_add_affected_connectors(&state->base, - &crtc->base); - if (ret) - return ret; - - ret = drm_atomic_add_affected_planes(&state->base, - &crtc->base); - if (ret) - return ret; - } - - return 0; -} - static int intel_modeset_checks(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *old_crtc_state, *new_crtc_state; struct intel_crtc *crtc; - int ret = 0, i; + int ret, i; if (!check_digital_port_conflicts(state)) { DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n"); @@ -13452,7 +13722,7 @@ static int intel_modeset_checks(struct intel_atomic_state *state) state->cdclk.force_min_cdclk = dev_priv->cdclk.force_min_cdclk; state->modeset = true; - state->active_crtcs = dev_priv->active_crtcs; + state->active_pipes = dev_priv->active_pipes; state->cdclk.logical = dev_priv->cdclk.logical; state->cdclk.actual = dev_priv->cdclk.actual; state->cdclk.pipe = INVALID_PIPE; @@ -13460,79 +13730,17 @@ static int intel_modeset_checks(struct intel_atomic_state *state) for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (new_crtc_state->base.active) - state->active_crtcs |= 1 << i; + state->active_pipes |= BIT(crtc->pipe); else - state->active_crtcs &= ~(1 << i); + state->active_pipes &= ~BIT(crtc->pipe); if (old_crtc_state->base.active != new_crtc_state->base.active) - state->active_pipe_changes |= drm_crtc_mask(&crtc->base); + state->active_pipe_changes |= BIT(crtc->pipe); } - /* - * See if the config requires any additional preparation, e.g. - * to adjust global state with pipes off. We need to do this - * here so we can get the modeset_pipe updated config for the new - * mode set on this crtc. For other crtcs we need to use the - * adjusted_mode bits in the crtc directly. - */ - if (dev_priv->display.modeset_calc_cdclk) { - enum pipe pipe; - - ret = dev_priv->display.modeset_calc_cdclk(state); - if (ret < 0) - return ret; - - /* - * Writes to dev_priv->cdclk.logical must protected by - * holding all the crtc locks, even if we don't end up - * touching the hardware - */ - if (intel_cdclk_changed(&dev_priv->cdclk.logical, - &state->cdclk.logical)) { - ret = intel_lock_all_pipes(state); - if (ret < 0) - return ret; - } - - if (is_power_of_2(state->active_crtcs)) { - struct intel_crtc *crtc; - struct intel_crtc_state *crtc_state; - - pipe = ilog2(state->active_crtcs); - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - crtc_state = intel_atomic_get_new_crtc_state(state, crtc); - if (crtc_state && needs_modeset(crtc_state)) - pipe = INVALID_PIPE; - } else { - pipe = INVALID_PIPE; - } - - /* All pipes must be switched off while we change the cdclk. */ - if (pipe != INVALID_PIPE && - intel_cdclk_needs_cd2x_update(dev_priv, - &dev_priv->cdclk.actual, - &state->cdclk.actual)) { - ret = intel_lock_all_pipes(state); - if (ret < 0) - return ret; - - state->cdclk.pipe = pipe; - } else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual, - &state->cdclk.actual)) { - ret = intel_modeset_all_pipes(state); - if (ret < 0) - return ret; - - state->cdclk.pipe = INVALID_PIPE; - } - - DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", - state->cdclk.logical.cdclk, - state->cdclk.actual.cdclk); - DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n", - state->cdclk.logical.voltage_level, - state->cdclk.actual.voltage_level); - } + ret = intel_modeset_calc_cdclk(state); + if (ret) + return ret; intel_modeset_clear_plls(state); @@ -13582,6 +13790,42 @@ static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_sta new_crtc_state->has_drrs = old_crtc_state->has_drrs; } +static int intel_atomic_check_planes(struct intel_atomic_state *state) +{ + struct intel_plane_state *plane_state; + struct intel_plane *plane; + int i, ret; + + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + ret = intel_plane_atomic_check(state, plane); + if (ret) { + DRM_DEBUG_ATOMIC("[PLANE:%d:%s] atomic driver check failed\n", + plane->base.base.id, plane->base.name); + return ret; + } + } + + return 0; +} + +static int intel_atomic_check_crtcs(struct intel_atomic_state *state) +{ + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + int i; + + for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { + int ret = intel_crtc_atomic_check(state, crtc); + if (ret) { + DRM_DEBUG_ATOMIC("[CRTC:%d:%s] atomic driver check failed\n", + crtc->base.base.id, crtc->base.name); + return ret; + } + } + + return 0; +} + /** * intel_atomic_check - validate state object * @dev: drm device @@ -13645,7 +13889,11 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) goto fail; - ret = drm_atomic_helper_check_planes(dev, &state->base); + ret = intel_atomic_check_planes(state); + if (ret) + goto fail; + + ret = intel_atomic_check_crtcs(state); if (ret) goto fail; @@ -13703,20 +13951,103 @@ u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc) return crtc->base.funcs->get_vblank_counter(&crtc->base); } +void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (!IS_GEN(dev_priv, 2)) + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); + + if (crtc_state->has_pch_encoder) { + enum pipe pch_transcoder = + intel_crtc_pch_transcoder(crtc); + + intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); + } +} + +static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state, + const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ + crtc->base.mode = new_crtc_state->base.mode; + + /* + * Update pipe size and adjust fitter if needed: the reason for this is + * that in compute_mode_changes we check the native mode (not the pfit + * mode) to see if we can flip rather than do a full mode set. In the + * fastboot case, we'll flip, but if we don't update the pipesrc and + * pfit state, we'll end up with a big fb scanned out into the wrong + * sized surface. + */ + intel_set_pipe_src_size(new_crtc_state); + + /* on skylake this is done by detaching scalers */ + if (INTEL_GEN(dev_priv) >= 9) { + skl_detach_scalers(new_crtc_state); + + if (new_crtc_state->pch_pfit.enabled) + skylake_pfit_enable(new_crtc_state); + } else if (HAS_PCH_SPLIT(dev_priv)) { + if (new_crtc_state->pch_pfit.enabled) + ironlake_pfit_enable(new_crtc_state); + else if (old_crtc_state->pch_pfit.enabled) + ironlake_pfit_disable(old_crtc_state); + } + + if (INTEL_GEN(dev_priv) >= 11) + icl_set_pipe_chicken(crtc); +} + +static void commit_pipe_config(struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + bool modeset = needs_modeset(new_crtc_state); + + /* + * During modesets pipe configuration was programmed as the + * CRTC was enabled. + */ + if (!modeset) { + if (new_crtc_state->base.color_mgmt_changed || + new_crtc_state->update_pipe) + intel_color_commit(new_crtc_state); + + if (INTEL_GEN(dev_priv) >= 9) + skl_detach_scalers(new_crtc_state); + + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) + bdw_set_pipemisc(new_crtc_state); + + if (new_crtc_state->update_pipe) + intel_pipe_fastset(old_crtc_state, new_crtc_state); + } + + if (dev_priv->display.atomic_update_watermarks) + dev_priv->display.atomic_update_watermarks(state, + new_crtc_state); +} + static void intel_update_crtc(struct intel_crtc *crtc, struct intel_atomic_state *state, struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *new_crtc_state) { - struct drm_device *dev = state->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); bool modeset = needs_modeset(new_crtc_state); struct intel_plane_state *new_plane_state = intel_atomic_get_new_plane_state(state, to_intel_plane(crtc->base.primary)); if (modeset) { - update_scanline_offset(new_crtc_state); + intel_crtc_update_active_timings(new_crtc_state); + dev_priv->display.crtc_enable(new_crtc_state, state); /* vblanks work again, re-enable pipe CRC. */ @@ -13733,17 +14064,151 @@ static void intel_update_crtc(struct intel_crtc *crtc, else if (new_plane_state) intel_fbc_enable(crtc, new_crtc_state, new_plane_state); - intel_begin_crtc_commit(state, crtc); + /* Perform vblank evasion around commit operation */ + intel_pipe_update_start(new_crtc_state); + + commit_pipe_config(state, old_crtc_state, new_crtc_state); if (INTEL_GEN(dev_priv) >= 9) skl_update_planes_on_crtc(state, crtc); else i9xx_update_planes_on_crtc(state, crtc); - intel_finish_crtc_commit(state, crtc); + intel_pipe_update_end(new_crtc_state); + + /* + * We usually enable FIFO underrun interrupts as part of the + * CRTC enable sequence during modesets. But when we inherit a + * valid pipe configuration from the BIOS we need to take care + * of enabling them on the CRTC's first fastset. + */ + if (new_crtc_state->update_pipe && !modeset && + old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED) + intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); } -static void intel_update_crtcs(struct intel_atomic_state *state) +static struct intel_crtc *intel_get_slave_crtc(const struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(new_crtc_state->base.crtc->dev); + enum transcoder slave_transcoder; + + WARN_ON(!is_power_of_2(new_crtc_state->sync_mode_slaves_mask)); + + slave_transcoder = ffs(new_crtc_state->sync_mode_slaves_mask) - 1; + return intel_get_crtc_for_pipe(dev_priv, + (enum pipe)slave_transcoder); +} + +static void intel_old_crtc_state_disables(struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state, + struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + + intel_crtc_disable_planes(state, crtc); + + /* + * We need to disable pipe CRC before disabling the pipe, + * or we race against vblank off. + */ + intel_crtc_disable_pipe_crc(crtc); + + dev_priv->display.crtc_disable(old_crtc_state, state); + crtc->active = false; + intel_fbc_disable(crtc); + intel_disable_shared_dpll(old_crtc_state); + + /* + * Underruns don't always raise interrupts, + * so check manually. + */ + intel_check_cpu_fifo_underruns(dev_priv); + intel_check_pch_fifo_underruns(dev_priv); + + /* FIXME unify this for all platforms */ + if (!new_crtc_state->base.active && + !HAS_GMCH(dev_priv) && + dev_priv->display.initial_watermarks) + dev_priv->display.initial_watermarks(state, + new_crtc_state); +} + +static void intel_trans_port_sync_modeset_disables(struct intel_atomic_state *state, + struct intel_crtc *crtc, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *slave_crtc = intel_get_slave_crtc(new_crtc_state); + struct intel_crtc_state *new_slave_crtc_state = + intel_atomic_get_new_crtc_state(state, slave_crtc); + struct intel_crtc_state *old_slave_crtc_state = + intel_atomic_get_old_crtc_state(state, slave_crtc); + + WARN_ON(!slave_crtc || !new_slave_crtc_state || + !old_slave_crtc_state); + + /* Disable Slave first */ + intel_pre_plane_update(old_slave_crtc_state, new_slave_crtc_state); + if (old_slave_crtc_state->base.active) + intel_old_crtc_state_disables(state, + old_slave_crtc_state, + new_slave_crtc_state, + slave_crtc); + + /* Disable Master */ + intel_pre_plane_update(old_crtc_state, new_crtc_state); + if (old_crtc_state->base.active) + intel_old_crtc_state_disables(state, + old_crtc_state, + new_crtc_state, + crtc); +} + +static void intel_commit_modeset_disables(struct intel_atomic_state *state) +{ + struct intel_crtc_state *new_crtc_state, *old_crtc_state; + struct intel_crtc *crtc; + int i; + + /* + * Disable CRTC/pipes in reverse order because some features(MST in + * TGL+) requires master and slave relationship between pipes, so it + * should always pick the lowest pipe as master as it will be enabled + * first and disable in the reverse order so the master will be the + * last one to be disabled. + */ + for_each_oldnew_intel_crtc_in_state_reverse(state, crtc, old_crtc_state, + new_crtc_state, i) { + if (!needs_modeset(new_crtc_state)) + continue; + + /* In case of Transcoder port Sync master slave CRTCs can be + * assigned in any order and we need to make sure that + * slave CRTCs are disabled first and then master CRTC since + * Slave vblanks are masked till Master Vblanks. + */ + if (is_trans_port_sync_mode(new_crtc_state)) { + if (is_trans_port_sync_master(new_crtc_state)) + intel_trans_port_sync_modeset_disables(state, + crtc, + old_crtc_state, + new_crtc_state); + else + continue; + } else { + intel_pre_plane_update(old_crtc_state, new_crtc_state); + + if (old_crtc_state->base.active) + intel_old_crtc_state_disables(state, + old_crtc_state, + new_crtc_state, + crtc); + } + } +} + +static void intel_commit_modeset_enables(struct intel_atomic_state *state) { struct intel_crtc *crtc; struct intel_crtc_state *old_crtc_state, *new_crtc_state; @@ -13758,14 +14223,120 @@ static void intel_update_crtcs(struct intel_atomic_state *state) } } -static void skl_update_crtcs(struct intel_atomic_state *state) +static void intel_crtc_enable_trans_port_sync(struct intel_crtc *crtc, + struct intel_atomic_state *state, + struct intel_crtc_state *new_crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + + intel_crtc_update_active_timings(new_crtc_state); + dev_priv->display.crtc_enable(new_crtc_state, state); + intel_crtc_enable_pipe_crc(crtc); +} + +static void intel_set_dp_tp_ctl_normal(struct intel_crtc *crtc, + struct intel_atomic_state *state) +{ + struct drm_connector_state *conn_state; + struct drm_connector *conn; + struct intel_dp *intel_dp; + int i; + + for_each_new_connector_in_state(&state->base, conn, conn_state, i) { + if (conn_state->crtc == &crtc->base) + break; + } + intel_dp = enc_to_intel_dp(&intel_attached_encoder(conn)->base); + intel_dp_stop_link_train(intel_dp); +} + +static void intel_post_crtc_enable_updates(struct intel_crtc *crtc, + struct intel_atomic_state *state) +{ + struct intel_crtc_state *new_crtc_state = + intel_atomic_get_new_crtc_state(state, crtc); + struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + struct intel_plane_state *new_plane_state = + intel_atomic_get_new_plane_state(state, + to_intel_plane(crtc->base.primary)); + bool modeset = needs_modeset(new_crtc_state); + + if (new_crtc_state->update_pipe && !new_crtc_state->enable_fbc) + intel_fbc_disable(crtc); + else if (new_plane_state) + intel_fbc_enable(crtc, new_crtc_state, new_plane_state); + + /* Perform vblank evasion around commit operation */ + intel_pipe_update_start(new_crtc_state); + commit_pipe_config(state, old_crtc_state, new_crtc_state); + skl_update_planes_on_crtc(state, crtc); + intel_pipe_update_end(new_crtc_state); + + /* + * We usually enable FIFO underrun interrupts as part of the + * CRTC enable sequence during modesets. But when we inherit a + * valid pipe configuration from the BIOS we need to take care + * of enabling them on the CRTC's first fastset. + */ + if (new_crtc_state->update_pipe && !modeset && + old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED) + intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); +} + +static void intel_update_trans_port_sync_crtcs(struct intel_crtc *crtc, + struct intel_atomic_state *state, + struct intel_crtc_state *old_crtc_state, + struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *slave_crtc = intel_get_slave_crtc(new_crtc_state); + struct intel_crtc_state *new_slave_crtc_state = + intel_atomic_get_new_crtc_state(state, slave_crtc); + struct intel_crtc_state *old_slave_crtc_state = + intel_atomic_get_old_crtc_state(state, slave_crtc); + + WARN_ON(!slave_crtc || !new_slave_crtc_state || + !old_slave_crtc_state); + + DRM_DEBUG_KMS("Updating Transcoder Port Sync Master CRTC = %d %s and Slave CRTC %d %s\n", + crtc->base.base.id, crtc->base.name, slave_crtc->base.base.id, + slave_crtc->base.name); + + /* Enable seq for slave with with DP_TP_CTL left Idle until the + * master is ready + */ + intel_crtc_enable_trans_port_sync(slave_crtc, + state, + new_slave_crtc_state); + + /* Enable seq for master with with DP_TP_CTL left Idle */ + intel_crtc_enable_trans_port_sync(crtc, + state, + new_crtc_state); + + /* Set Slave's DP_TP_CTL to Normal */ + intel_set_dp_tp_ctl_normal(slave_crtc, + state); + + /* Set Master's DP_TP_CTL To Normal */ + usleep_range(200, 400); + intel_set_dp_tp_ctl_normal(crtc, + state); + + /* Now do the post crtc enable for all master and slaves */ + intel_post_crtc_enable_updates(slave_crtc, + state); + intel_post_crtc_enable_updates(crtc, + state); +} + +static void skl_commit_modeset_enables(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; struct intel_crtc_state *old_crtc_state, *new_crtc_state; unsigned int updated = 0; bool progress; - enum pipe pipe; int i; u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; u8 required_slices = state->wm_results.ddb.enabled_slices; @@ -13790,20 +14361,19 @@ static void skl_update_crtcs(struct intel_atomic_state *state) progress = false; for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + enum pipe pipe = crtc->pipe; bool vbl_wait = false; - unsigned int cmask = drm_crtc_mask(&crtc->base); - - pipe = crtc->pipe; + bool modeset = needs_modeset(new_crtc_state); - if (updated & cmask || !new_crtc_state->base.active) + if (updated & BIT(crtc->pipe) || !new_crtc_state->base.active) continue; if (skl_ddb_allocation_overlaps(&new_crtc_state->wm.skl.ddb, entries, - INTEL_INFO(dev_priv)->num_pipes, i)) + INTEL_NUM_PIPES(dev_priv), i)) continue; - updated |= cmask; + updated |= BIT(pipe); entries[i] = new_crtc_state->wm.skl.ddb; /* @@ -13814,12 +14384,22 @@ static void skl_update_crtcs(struct intel_atomic_state *state) */ if (!skl_ddb_entry_equal(&new_crtc_state->wm.skl.ddb, &old_crtc_state->wm.skl.ddb) && - !new_crtc_state->base.active_changed && + !modeset && state->wm_results.dirty_pipes != updated) vbl_wait = true; - intel_update_crtc(crtc, state, old_crtc_state, - new_crtc_state); + if (modeset && is_trans_port_sync_mode(new_crtc_state)) { + if (is_trans_port_sync_master(new_crtc_state)) + intel_update_trans_port_sync_crtcs(crtc, + state, + old_crtc_state, + new_crtc_state); + else + continue; + } else { + intel_update_crtc(crtc, state, old_crtc_state, + new_crtc_state); + } if (vbl_wait) intel_wait_for_vblank(dev_priv, pipe); @@ -13908,49 +14488,18 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) if (state->modeset) wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); - for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, + new_crtc_state, i) { if (needs_modeset(new_crtc_state) || new_crtc_state->update_pipe) { put_domains[crtc->pipe] = modeset_get_crtc_power_domains(new_crtc_state); } - - if (!needs_modeset(new_crtc_state)) - continue; - - intel_pre_plane_update(old_crtc_state, new_crtc_state); - - if (old_crtc_state->base.active) { - intel_crtc_disable_planes(state, crtc); - - /* - * We need to disable pipe CRC before disabling the pipe, - * or we race against vblank off. - */ - intel_crtc_disable_pipe_crc(crtc); - - dev_priv->display.crtc_disable(old_crtc_state, state); - crtc->active = false; - intel_fbc_disable(crtc); - intel_disable_shared_dpll(old_crtc_state); - - /* - * Underruns don't always raise - * interrupts, so check manually. - */ - intel_check_cpu_fifo_underruns(dev_priv); - intel_check_pch_fifo_underruns(dev_priv); - - /* FIXME unify this for all platforms */ - if (!new_crtc_state->base.active && - !HAS_GMCH(dev_priv) && - dev_priv->display.initial_watermarks) - dev_priv->display.initial_watermarks(state, - new_crtc_state); - } } + intel_commit_modeset_disables(state); + /* FIXME: Eventually get rid of our crtc->config pointer */ for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) crtc->config = new_crtc_state; @@ -13991,7 +14540,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_encoders_update_prepare(state); /* Now enable the clocks, plane, pipe, and connectors that we set up. */ - dev_priv->display.update_crtcs(state); + dev_priv->display.commit_modeset_enables(state); if (state->modeset) { intel_encoders_update_complete(state); @@ -14191,7 +14740,7 @@ static int intel_atomic_commit(struct drm_device *dev, sizeof(state->min_cdclk)); memcpy(dev_priv->min_voltage_level, state->min_voltage_level, sizeof(state->min_voltage_level)); - dev_priv->active_crtcs = state->active_crtcs; + dev_priv->active_pipes = state->active_pipes; dev_priv->cdclk.force_min_cdclk = state->cdclk.force_min_cdclk; intel_cdclk_swap_state(state); @@ -14204,7 +14753,7 @@ static int intel_atomic_commit(struct drm_device *dev, if (nonblock && state->modeset) { queue_work(dev_priv->modeset_wq, &state->base.commit_work); } else if (nonblock) { - queue_work(system_unbound_wq, &state->base.commit_work); + queue_work(dev_priv->flip_wq, &state->base.commit_work); } else { if (state->modeset) flush_workqueue(dev_priv->modeset_wq); @@ -14323,25 +14872,25 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for - * @new_state: the plane state being prepared + * @_new_plane_state: the plane state being prepared * * Prepares a framebuffer for usage on a display plane. Generally this * involves pinning the underlying object and updating the frontbuffer tracking * bits. Some older platforms need special physical address handling for * cursor planes. * - * Must be called with struct_mutex held. - * * Returns 0 on success, negative error code on failure. */ int intel_prepare_plane_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) + struct drm_plane_state *_new_plane_state) { + struct intel_plane_state *new_plane_state = + to_intel_plane_state(_new_plane_state); struct intel_atomic_state *intel_state = - to_intel_atomic_state(new_state->state); + to_intel_atomic_state(new_plane_state->base.state); struct drm_i915_private *dev_priv = to_i915(plane->dev); - struct drm_framebuffer *fb = new_state->fb; + struct drm_framebuffer *fb = new_plane_state->base.fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); int ret; @@ -14372,9 +14921,9 @@ intel_prepare_plane_fb(struct drm_plane *plane, } } - if (new_state->fence) { /* explicit fencing */ + if (new_plane_state->base.fence) { /* explicit fencing */ ret = i915_sw_fence_await_dma_fence(&intel_state->commit_ready, - new_state->fence, + new_plane_state->base.fence, I915_FENCE_TIMEOUT, GFP_KERNEL); if (ret < 0) @@ -14388,15 +14937,8 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret) return ret; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) { - i915_gem_object_unpin_pages(obj); - return ret; - } - - ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); + ret = intel_plane_pin_fb(new_plane_state); - mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_object_unpin_pages(obj); if (ret) return ret; @@ -14404,7 +14946,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, fb_obj_bump_render_priority(obj); intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_DIRTYFB); - if (!new_state->fence) { /* implicit fencing */ + if (!new_plane_state->base.fence) { /* implicit fencing */ struct dma_fence *fence; ret = i915_sw_fence_await_reservation(&intel_state->commit_ready, @@ -14416,11 +14958,13 @@ intel_prepare_plane_fb(struct drm_plane *plane, fence = dma_resv_get_excl_rcu(obj->base.resv); if (fence) { - add_rps_boost_after_vblank(new_state->crtc, fence); + add_rps_boost_after_vblank(new_plane_state->base.crtc, + fence); dma_fence_put(fence); } } else { - add_rps_boost_after_vblank(new_state->crtc, new_state->fence); + add_rps_boost_after_vblank(new_plane_state->base.crtc, + new_plane_state->base.fence); } /* @@ -14442,18 +14986,18 @@ intel_prepare_plane_fb(struct drm_plane *plane, /** * intel_cleanup_plane_fb - Cleans up an fb after plane use * @plane: drm plane to clean up for - * @old_state: the state from the previous modeset + * @_old_plane_state: the state from the previous modeset * * Cleans up a framebuffer that has just been removed from a plane. - * - * Must be called with struct_mutex held. */ void intel_cleanup_plane_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) + struct drm_plane_state *_old_plane_state) { + struct intel_plane_state *old_plane_state = + to_intel_plane_state(_old_plane_state); struct intel_atomic_state *intel_state = - to_intel_atomic_state(old_state->state); + to_intel_atomic_state(old_plane_state->base.state); struct drm_i915_private *dev_priv = to_i915(plane->dev); if (intel_state->rps_interactive) { @@ -14462,18 +15006,16 @@ intel_cleanup_plane_fb(struct drm_plane *plane, } /* Should only be called after a successful intel_prepare_plane_fb()! */ - mutex_lock(&dev_priv->drm.struct_mutex); - intel_plane_unpin_fb(to_intel_plane_state(old_state)); - mutex_unlock(&dev_priv->drm.struct_mutex); + intel_plane_unpin_fb(old_plane_state); } int skl_max_scale(const struct intel_crtc_state *crtc_state, - u32 pixel_format) + const struct drm_format_info *format) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int max_scale, mult; + int max_scale; int crtc_clock, max_dotclk, tmpclk1, tmpclk2; if (!crtc_state->base.enable) @@ -14494,80 +15036,17 @@ skl_max_scale(const struct intel_crtc_state *crtc_state, * or * cdclk/crtc_clock */ - mult = is_planar_yuv_format(pixel_format) ? 2 : 3; - tmpclk1 = (1 << 16) * mult - 1; + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv) || + !drm_format_info_is_yuv_semiplanar(format)) + tmpclk1 = 0x30000 - 1; + else + tmpclk1 = 0x20000 - 1; tmpclk2 = (1 << 8) * ((max_dotclk << 8) / crtc_clock); max_scale = min(tmpclk1, tmpclk2); return max_scale; } -static void intel_begin_crtc_commit(struct intel_atomic_state *state, - struct intel_crtc *crtc) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(state, crtc); - struct intel_crtc_state *new_crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - bool modeset = needs_modeset(new_crtc_state); - - /* Perform vblank evasion around commit operation */ - intel_pipe_update_start(new_crtc_state); - - if (modeset) - goto out; - - if (new_crtc_state->base.color_mgmt_changed || - new_crtc_state->update_pipe) - intel_color_commit(new_crtc_state); - - if (new_crtc_state->update_pipe) - intel_update_pipe_config(old_crtc_state, new_crtc_state); - else if (INTEL_GEN(dev_priv) >= 9) - skl_detach_scalers(new_crtc_state); - - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - bdw_set_pipemisc(new_crtc_state); - -out: - if (dev_priv->display.atomic_update_watermarks) - dev_priv->display.atomic_update_watermarks(state, - new_crtc_state); -} - -void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, - struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - - if (!IS_GEN(dev_priv, 2)) - intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); - - if (crtc_state->has_pch_encoder) { - enum pipe pch_transcoder = - intel_crtc_pch_transcoder(crtc); - - intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); - } -} - -static void intel_finish_crtc_commit(struct intel_atomic_state *state, - struct intel_crtc *crtc) -{ - struct intel_crtc_state *old_crtc_state = - intel_atomic_get_old_crtc_state(state, crtc); - struct intel_crtc_state *new_crtc_state = - intel_atomic_get_new_crtc_state(state, crtc); - - intel_pipe_update_end(new_crtc_state); - - if (new_crtc_state->update_pipe && - !needs_modeset(new_crtc_state) && - old_crtc_state->base.mode.private_flags & I915_MODE_FLAG_INHERITED) - intel_crtc_arm_fifo_underrun(crtc, new_crtc_state); -} - /** * intel_plane_destroy - destroy a plane * @plane: plane to destroy @@ -14655,8 +15134,8 @@ static const struct drm_plane_funcs i8xx_plane_funcs = { }; static int -intel_legacy_cursor_update(struct drm_plane *plane, - struct drm_crtc *crtc, +intel_legacy_cursor_update(struct drm_plane *_plane, + struct drm_crtc *_crtc, struct drm_framebuffer *fb, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, @@ -14664,11 +15143,13 @@ intel_legacy_cursor_update(struct drm_plane *plane, u32 src_w, u32 src_h, struct drm_modeset_acquire_ctx *ctx) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct drm_plane_state *old_plane_state, *new_plane_state; - struct intel_plane *intel_plane = to_intel_plane(plane); + struct intel_plane *plane = to_intel_plane(_plane); + struct intel_crtc *crtc = to_intel_crtc(_crtc); + struct intel_plane_state *old_plane_state = + to_intel_plane_state(plane->base.state); + struct intel_plane_state *new_plane_state; struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->state); + to_intel_crtc_state(crtc->base.state); struct intel_crtc_state *new_crtc_state; int ret; @@ -14680,14 +15161,13 @@ intel_legacy_cursor_update(struct drm_plane *plane, crtc_state->update_pipe) goto slow; - old_plane_state = plane->state; /* * Don't do an async update if there is an outstanding commit modifying * the plane. This prevents our async update's changes from getting * overridden by a previous synchronous update's state. */ - if (old_plane_state->commit && - !try_wait_for_completion(&old_plane_state->commit->hw_done)) + if (old_plane_state->base.commit && + !try_wait_for_completion(&old_plane_state->base.commit->hw_done)) goto slow; /* @@ -14695,56 +15175,51 @@ intel_legacy_cursor_update(struct drm_plane *plane, * take the slowpath. Only changing fb or position should be * in the fastpath. */ - if (old_plane_state->crtc != crtc || - old_plane_state->src_w != src_w || - old_plane_state->src_h != src_h || - old_plane_state->crtc_w != crtc_w || - old_plane_state->crtc_h != crtc_h || - !old_plane_state->fb != !fb) + if (old_plane_state->base.crtc != &crtc->base || + old_plane_state->base.src_w != src_w || + old_plane_state->base.src_h != src_h || + old_plane_state->base.crtc_w != crtc_w || + old_plane_state->base.crtc_h != crtc_h || + !old_plane_state->base.fb != !fb) goto slow; - new_plane_state = intel_plane_duplicate_state(plane); + new_plane_state = to_intel_plane_state(intel_plane_duplicate_state(&plane->base)); if (!new_plane_state) return -ENOMEM; - new_crtc_state = to_intel_crtc_state(intel_crtc_duplicate_state(crtc)); + new_crtc_state = to_intel_crtc_state(intel_crtc_duplicate_state(&crtc->base)); if (!new_crtc_state) { ret = -ENOMEM; goto out_free; } - drm_atomic_set_fb_for_plane(new_plane_state, fb); + drm_atomic_set_fb_for_plane(&new_plane_state->base, fb); - new_plane_state->src_x = src_x; - new_plane_state->src_y = src_y; - new_plane_state->src_w = src_w; - new_plane_state->src_h = src_h; - new_plane_state->crtc_x = crtc_x; - new_plane_state->crtc_y = crtc_y; - new_plane_state->crtc_w = crtc_w; - new_plane_state->crtc_h = crtc_h; + new_plane_state->base.src_x = src_x; + new_plane_state->base.src_y = src_y; + new_plane_state->base.src_w = src_w; + new_plane_state->base.src_h = src_h; + new_plane_state->base.crtc_x = crtc_x; + new_plane_state->base.crtc_y = crtc_y; + new_plane_state->base.crtc_w = crtc_w; + new_plane_state->base.crtc_h = crtc_h; ret = intel_plane_atomic_check_with_state(crtc_state, new_crtc_state, - to_intel_plane_state(old_plane_state), - to_intel_plane_state(new_plane_state)); + old_plane_state, new_plane_state); if (ret) goto out_free; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); + ret = intel_plane_pin_fb(new_plane_state); if (ret) goto out_free; - ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state)); - if (ret) - goto out_unlock; - - intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP); - intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb), - to_intel_frontbuffer(fb), - intel_plane->frontbuffer_bit); + intel_frontbuffer_flush(to_intel_frontbuffer(new_plane_state->base.fb), ORIGIN_FLIP); + intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->base.fb), + to_intel_frontbuffer(new_plane_state->base.fb), + plane->frontbuffer_bit); /* Swap plane state */ - plane->state = new_plane_state; + plane->base.state = &new_plane_state->base; /* * We cannot swap crtc_state as it may be in use by an atomic commit or @@ -14758,27 +15233,24 @@ intel_legacy_cursor_update(struct drm_plane *plane, */ crtc_state->active_planes = new_crtc_state->active_planes; - if (plane->state->visible) - intel_update_plane(intel_plane, crtc_state, - to_intel_plane_state(plane->state)); + if (new_plane_state->base.visible) + intel_update_plane(plane, crtc_state, new_plane_state); else - intel_disable_plane(intel_plane, crtc_state); + intel_disable_plane(plane, crtc_state); - intel_plane_unpin_fb(to_intel_plane_state(old_plane_state)); + intel_plane_unpin_fb(old_plane_state); -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); out_free: if (new_crtc_state) - intel_crtc_destroy_state(crtc, &new_crtc_state->base); + intel_crtc_destroy_state(&crtc->base, &new_crtc_state->base); if (ret) - intel_plane_destroy_state(plane, new_plane_state); + intel_plane_destroy_state(&plane->base, &new_plane_state->base); else - intel_plane_destroy_state(plane, old_plane_state); + intel_plane_destroy_state(&plane->base, &old_plane_state->base); return ret; slow: - return drm_atomic_helper_update_plane(plane, crtc, fb, + return drm_atomic_helper_update_plane(&plane->base, &crtc->base, fb, crtc_x, crtc_y, crtc_w, crtc_h, src_x, src_y, src_w, src_h, ctx); } @@ -14819,7 +15291,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) const u64 *modifiers; const u32 *formats; int num_formats; - int ret; + int ret, zpos; if (INTEL_GEN(dev_priv) >= 9) return skl_universal_plane_create(dev_priv, pipe, @@ -14908,6 +15380,9 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) DRM_MODE_ROTATE_0, supported_rotations); + zpos = 0; + drm_plane_create_zpos_immutable_property(&plane->base, zpos); + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); return plane; @@ -14924,7 +15399,7 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, { unsigned int possible_crtcs; struct intel_plane *cursor; - int ret; + int ret, zpos; cursor = intel_plane_alloc(); if (IS_ERR(cursor)) @@ -14973,6 +15448,9 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180); + zpos = RUNTIME_INFO(dev_priv)->num_sprites[pipe] + 1; + drm_plane_create_zpos_immutable_property(&cursor->base, zpos); + drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs); return cursor; @@ -15048,12 +15526,12 @@ static const struct drm_crtc_funcs i965_crtc_funcs = { .disable_vblank = i965_disable_vblank, }; -static const struct drm_crtc_funcs i945gm_crtc_funcs = { +static const struct drm_crtc_funcs i915gm_crtc_funcs = { INTEL_CRTC_FUNCS, .get_vblank_counter = i915_get_vblank_counter, - .enable_vblank = i945gm_enable_vblank, - .disable_vblank = i945gm_disable_vblank, + .enable_vblank = i915gm_enable_vblank, + .disable_vblank = i915gm_disable_vblank, }; static const struct drm_crtc_funcs i915_crtc_funcs = { @@ -15124,8 +15602,8 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) funcs = &g4x_crtc_funcs; else if (IS_GEN(dev_priv, 4)) funcs = &i965_crtc_funcs; - else if (IS_I945GM(dev_priv)) - funcs = &i945gm_crtc_funcs; + else if (IS_I945GM(dev_priv) || IS_I915GM(dev_priv)) + funcs = &i915gm_crtc_funcs; else if (IS_GEN(dev_priv, 3)) funcs = &i915_crtc_funcs; else @@ -15160,8 +15638,6 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) dev_priv->plane_to_crtc_mapping[i9xx_plane] = intel_crtc; } - drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); - intel_color_init(intel_crtc); WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe); @@ -15196,21 +15672,32 @@ int intel_get_pipe_from_crtc_id_ioctl(struct drm_device *dev, void *data, return 0; } -static int intel_encoder_clones(struct intel_encoder *encoder) +static u32 intel_encoder_possible_clones(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; struct intel_encoder *source_encoder; - int index_mask = 0; - int entry = 0; + u32 possible_clones = 0; for_each_intel_encoder(dev, source_encoder) { if (encoders_cloneable(encoder, source_encoder)) - index_mask |= (1 << entry); + possible_clones |= drm_encoder_mask(&source_encoder->base); + } - entry++; + return possible_clones; +} + +static u32 intel_encoder_possible_crtcs(struct intel_encoder *encoder) +{ + struct drm_device *dev = encoder->base.dev; + struct intel_crtc *crtc; + u32 possible_crtcs = 0; + + for_each_intel_crtc(dev, crtc) { + if (encoder->crtc_mask & BIT(crtc->pipe)) + possible_crtcs |= drm_crtc_mask(&crtc->base); } - return index_mask; + return possible_crtcs; } static bool ilk_has_edp_a(struct drm_i915_private *dev_priv) @@ -15292,13 +15779,18 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_pps_init(dev_priv); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) return; if (INTEL_GEN(dev_priv) >= 12) { - /* TODO: initialize TC ports as well */ intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); + intel_ddi_init(dev_priv, PORT_D); + intel_ddi_init(dev_priv, PORT_E); + intel_ddi_init(dev_priv, PORT_F); + intel_ddi_init(dev_priv, PORT_G); + intel_ddi_init(dev_priv, PORT_H); + intel_ddi_init(dev_priv, PORT_I); icl_dsi_init(dev_priv); } else if (IS_ELKHARTLAKE(dev_priv)) { intel_ddi_init(dev_priv, PORT_A); @@ -15508,9 +16000,10 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_psr_init(dev_priv); for_each_intel_encoder(&dev_priv->drm, encoder) { - encoder->base.possible_crtcs = encoder->crtc_mask; + encoder->base.possible_crtcs = + intel_encoder_possible_crtcs(encoder); encoder->base.possible_clones = - intel_encoder_clones(encoder); + intel_encoder_possible_clones(encoder); } intel_init_pch_refclk(dev_priv); @@ -15765,8 +16258,14 @@ intel_mode_valid(struct drm_device *dev, DRM_MODE_FLAG_CLKDIV2)) return MODE_BAD; - if (INTEL_GEN(dev_priv) >= 9 || - IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) { + /* Transcoder timing limits */ + if (INTEL_GEN(dev_priv) >= 11) { + hdisplay_max = 16384; + vdisplay_max = 8192; + htotal_max = 16384; + vtotal_max = 8192; + } else if (INTEL_GEN(dev_priv) >= 9 || + IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv)) { hdisplay_max = 8192; /* FDI max 4096 handled elsewhere */ vdisplay_max = 4096; htotal_max = 8192; @@ -15798,6 +16297,41 @@ intel_mode_valid(struct drm_device *dev, return MODE_OK; } +enum drm_mode_status +intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv, + const struct drm_display_mode *mode) +{ + int plane_width_max, plane_height_max; + + /* + * intel_mode_valid() should be + * sufficient on older platforms. + */ + if (INTEL_GEN(dev_priv) < 9) + return MODE_OK; + + /* + * Most people will probably want a fullscreen + * plane so let's not advertize modes that are + * too big for that. + */ + if (INTEL_GEN(dev_priv) >= 11) { + plane_width_max = 5120; + plane_height_max = 4320; + } else { + plane_width_max = 5120; + plane_height_max = 4096; + } + + if (mode->hdisplay > plane_width_max) + return MODE_H_ILLEGAL; + + if (mode->vdisplay > plane_height_max) + return MODE_V_ILLEGAL; + + return MODE_OK; +} + static const struct drm_mode_config_funcs intel_mode_funcs = { .fb_create = intel_user_framebuffer_create, .get_format_info = intel_get_format_info, @@ -15898,47 +16432,17 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } if (INTEL_GEN(dev_priv) >= 9) - dev_priv->display.update_crtcs = skl_update_crtcs; - else - dev_priv->display.update_crtcs = intel_update_crtcs; -} - -static i915_reg_t i915_vgacntrl_reg(struct drm_i915_private *dev_priv) -{ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - return VLV_VGACNTRL; - else if (INTEL_GEN(dev_priv) >= 5) - return CPU_VGACNTRL; + dev_priv->display.commit_modeset_enables = skl_commit_modeset_enables; else - return VGACNTRL; -} - -/* Disable the VGA plane that we never use */ -static void i915_disable_vga(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u8 sr1; - i915_reg_t vga_reg = i915_vgacntrl_reg(dev_priv); - - /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */ - vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); - outb(SR01, VGA_SR_INDEX); - sr1 = inb(VGA_SR_DATA); - outb(sr1 | 1<<5, VGA_SR_DATA); - vga_put(pdev, VGA_RSRC_LEGACY_IO); - udelay(300); + dev_priv->display.commit_modeset_enables = intel_commit_modeset_enables; - I915_WRITE(vga_reg, VGA_DISP_DISABLE); - POSTING_READ(vga_reg); } -void intel_modeset_init_hw(struct drm_device *dev) +void intel_modeset_init_hw(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); - - intel_update_cdclk(dev_priv); - intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); - dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; + intel_update_cdclk(i915); + intel_dump_cdclk_state(&i915->cdclk.hw, "Current CDCLK"); + i915->cdclk.logical = i915->cdclk.actual = i915->cdclk.hw; } /* @@ -16098,114 +16602,111 @@ out: return ret; } -int intel_modeset_init(struct drm_device *dev) +static void intel_mode_config_init(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe; - struct intel_crtc *crtc; - int ret; - - dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); - - drm_mode_config_init(dev); + struct drm_mode_config *mode_config = &i915->drm.mode_config; - ret = intel_bw_init(dev_priv); - if (ret) - return ret; + drm_mode_config_init(&i915->drm); - dev->mode_config.min_width = 0; - dev->mode_config.min_height = 0; + mode_config->min_width = 0; + mode_config->min_height = 0; - dev->mode_config.preferred_depth = 24; - dev->mode_config.prefer_shadow = 1; + mode_config->preferred_depth = 24; + mode_config->prefer_shadow = 1; - dev->mode_config.allow_fb_modifiers = true; + mode_config->allow_fb_modifiers = true; - dev->mode_config.funcs = &intel_mode_funcs; - - init_llist_head(&dev_priv->atomic_helper.free_list); - INIT_WORK(&dev_priv->atomic_helper.free_work, - intel_atomic_helper_free_state_worker); - - intel_init_quirks(dev_priv); - - intel_fbc_init(dev_priv); - - intel_init_pm(dev_priv); - - /* - * There may be no VBT; and if the BIOS enabled SSC we can - * just keep using it to avoid unnecessary flicker. Whereas if the - * BIOS isn't using it, don't assume it will work even if the VBT - * indicates as much. - */ - if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv)) { - bool bios_lvds_use_ssc = !!(I915_READ(PCH_DREF_CONTROL) & - DREF_SSC1_ENABLE); - - if (dev_priv->vbt.lvds_use_ssc != bios_lvds_use_ssc) { - DRM_DEBUG_KMS("SSC %sabled by BIOS, overriding VBT which says %sabled\n", - bios_lvds_use_ssc ? "en" : "dis", - dev_priv->vbt.lvds_use_ssc ? "en" : "dis"); - dev_priv->vbt.lvds_use_ssc = bios_lvds_use_ssc; - } - } + mode_config->funcs = &intel_mode_funcs; /* * Maximum framebuffer dimensions, chosen to match * the maximum render engine surface size on gen4+. */ - if (INTEL_GEN(dev_priv) >= 7) { - dev->mode_config.max_width = 16384; - dev->mode_config.max_height = 16384; - } else if (INTEL_GEN(dev_priv) >= 4) { - dev->mode_config.max_width = 8192; - dev->mode_config.max_height = 8192; - } else if (IS_GEN(dev_priv, 3)) { - dev->mode_config.max_width = 4096; - dev->mode_config.max_height = 4096; + if (INTEL_GEN(i915) >= 7) { + mode_config->max_width = 16384; + mode_config->max_height = 16384; + } else if (INTEL_GEN(i915) >= 4) { + mode_config->max_width = 8192; + mode_config->max_height = 8192; + } else if (IS_GEN(i915, 3)) { + mode_config->max_width = 4096; + mode_config->max_height = 4096; } else { - dev->mode_config.max_width = 2048; - dev->mode_config.max_height = 2048; + mode_config->max_width = 2048; + mode_config->max_height = 2048; } - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { - dev->mode_config.cursor_width = IS_I845G(dev_priv) ? 64 : 512; - dev->mode_config.cursor_height = 1023; - } else if (IS_GEN(dev_priv, 2)) { - dev->mode_config.cursor_width = 64; - dev->mode_config.cursor_height = 64; + if (IS_I845G(i915) || IS_I865G(i915)) { + mode_config->cursor_width = IS_I845G(i915) ? 64 : 512; + mode_config->cursor_height = 1023; + } else if (IS_GEN(i915, 2)) { + mode_config->cursor_width = 64; + mode_config->cursor_height = 64; } else { - dev->mode_config.cursor_width = 256; - dev->mode_config.cursor_height = 256; + mode_config->cursor_width = 256; + mode_config->cursor_height = 256; } +} - DRM_DEBUG_KMS("%d display pipe%s available.\n", - INTEL_INFO(dev_priv)->num_pipes, - INTEL_INFO(dev_priv)->num_pipes > 1 ? "s" : ""); +int intel_modeset_init(struct drm_i915_private *i915) +{ + struct drm_device *dev = &i915->drm; + enum pipe pipe; + struct intel_crtc *crtc; + int ret; - for_each_pipe(dev_priv, pipe) { - ret = intel_crtc_init(dev_priv, pipe); - if (ret) { - drm_mode_config_cleanup(dev); - return ret; + i915->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); + i915->flip_wq = alloc_workqueue("i915_flip", WQ_HIGHPRI | + WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); + + intel_mode_config_init(i915); + + ret = intel_bw_init(i915); + if (ret) + return ret; + + init_llist_head(&i915->atomic_helper.free_list); + INIT_WORK(&i915->atomic_helper.free_work, + intel_atomic_helper_free_state_worker); + + intel_init_quirks(i915); + + intel_fbc_init(i915); + + intel_init_pm(i915); + + intel_panel_sanitize_ssc(i915); + + intel_gmbus_setup(i915); + + DRM_DEBUG_KMS("%d display pipe%s available.\n", + INTEL_NUM_PIPES(i915), + INTEL_NUM_PIPES(i915) > 1 ? "s" : ""); + + if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) { + for_each_pipe(i915, pipe) { + ret = intel_crtc_init(i915, pipe); + if (ret) { + drm_mode_config_cleanup(dev); + return ret; + } } } intel_shared_dpll_init(dev); - intel_update_fdi_pll_freq(dev_priv); + intel_update_fdi_pll_freq(i915); - intel_update_czclk(dev_priv); - intel_modeset_init_hw(dev); + intel_update_czclk(i915); + intel_modeset_init_hw(i915); - intel_hdcp_component_init(dev_priv); + intel_hdcp_component_init(i915); - if (dev_priv->max_cdclk_freq == 0) - intel_update_max_cdclk(dev_priv); + if (i915->max_cdclk_freq == 0) + intel_update_max_cdclk(i915); /* Just disable it once at startup */ - i915_disable_vga(dev_priv); - intel_setup_outputs(dev_priv); + intel_vga_disable(i915); + intel_setup_outputs(i915); drm_modeset_lock_all(dev); intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx); @@ -16224,8 +16725,7 @@ int intel_modeset_init(struct drm_device *dev) * can even allow for smooth boot transitions if the BIOS * fb is large enough for the active pipe configuration. */ - dev_priv->display.get_initial_plane_config(crtc, - &plane_config); + i915->display.get_initial_plane_config(crtc, &plane_config); /* * If the fb is shared between multiple heads, we'll @@ -16239,7 +16739,7 @@ int intel_modeset_init(struct drm_device *dev) * Note that we need to do this after reconstructing the BIOS fb's * since the watermark calculation done here will use pstate->fb. */ - if (!HAS_GMCH(dev_priv)) + if (!HAS_GMCH(i915)) sanitize_watermarks(dev); /* @@ -16564,39 +17064,6 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) icl_sanitize_encoder_pll_mapping(encoder); } -void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv) -{ - i915_reg_t vga_reg = i915_vgacntrl_reg(dev_priv); - - if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) { - DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); - i915_disable_vga(dev_priv); - } -} - -void i915_redisable_vga(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - - /* - * This function can be called both from intel_modeset_setup_hw_state or - * at a very early point in our resume sequence, where the power well - * structures are not yet restored. Since this function is at a very - * paranoid "someone might have enabled VGA while we were not looking" - * level, just check if the power well is enabled instead of trying to - * follow the "don't touch the power well if we don't need it" policy - * the rest of the driver uses. - */ - wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_VGA); - if (!wakeref) - return; - - i915_redisable_vga_power_on(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_VGA, wakeref); -} - /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct drm_i915_private *dev_priv) { @@ -16640,7 +17107,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) struct drm_connector_list_iter conn_iter; int i; - dev_priv->active_crtcs = 0; + dev_priv->active_pipes = 0; for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = @@ -16657,7 +17124,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) crtc->active = crtc_state->base.active; if (crtc_state->base.active) - dev_priv->active_crtcs |= 1 << crtc->pipe; + dev_priv->active_pipes |= BIT(crtc->pipe); DRM_DEBUG_KMS("[CRTC:%d:%s] hw state readout: %s\n", crtc->base.base.id, crtc->base.name, @@ -16717,24 +17184,28 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) drm_connector_list_iter_begin(dev, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (connector->get_hw_state(connector)) { + struct intel_crtc_state *crtc_state; + struct intel_crtc *crtc; + connector->base.dpms = DRM_MODE_DPMS_ON; encoder = connector->encoder; connector->base.encoder = &encoder->base; - if (encoder->base.crtc && - encoder->base.crtc->state->active) { + crtc = to_intel_crtc(encoder->base.crtc); + crtc_state = crtc ? to_intel_crtc_state(crtc->base.state) : NULL; + + if (crtc_state && crtc_state->base.active) { /* * This has to be done during hardware readout * because anything calling .crtc_disable may * rely on the connector_mask being accurate. */ - encoder->base.crtc->state->connector_mask |= + crtc_state->base.connector_mask |= drm_connector_mask(&connector->base); - encoder->base.crtc->state->encoder_mask |= + crtc_state->base.encoder_mask |= drm_encoder_mask(&encoder->base); } - } else { connector->base.dpms = DRM_MODE_DPMS_OFF; connector->base.encoder = NULL; @@ -16759,7 +17230,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) crtc->base.mode.hdisplay = crtc_state->pipe_src_w; crtc->base.mode.vdisplay = crtc_state->pipe_src_h; intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state); - WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode)); + WARN_ON(drm_atomic_set_mode_for_crtc(&crtc_state->base, &crtc->base.mode)); /* * The initial mode needs to be set in order to keep @@ -16774,15 +17245,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) intel_crtc_compute_pixel_rate(crtc_state); - if (dev_priv->display.modeset_calc_cdclk) { - min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); - if (WARN_ON(min_cdclk < 0)) - min_cdclk = 0; - } + min_cdclk = intel_crtc_compute_min_cdclk(crtc_state); + if (WARN_ON(min_cdclk < 0)) + min_cdclk = 0; - drm_calc_timestamping_constants(&crtc->base, - &crtc_state->base.adjusted_mode); - update_scanline_offset(crtc_state); + intel_crtc_update_active_timings(crtc_state); } dev_priv->min_cdclk[crtc->pipe] = min_cdclk; @@ -17042,13 +17509,13 @@ void intel_display_resume(struct drm_device *dev) drm_atomic_state_put(state); } -static void intel_hpd_poll_fini(struct drm_device *dev) +static void intel_hpd_poll_fini(struct drm_i915_private *i915) { struct intel_connector *connector; struct drm_connector_list_iter conn_iter; /* Kill all the work that may have been queued by hpd. */ - drm_connector_list_iter_begin(dev, &conn_iter); + drm_connector_list_iter_begin(&i915->drm, &conn_iter); for_each_intel_connector_iter(connector, &conn_iter) { if (connector->modeset_retry_work.func) cancel_work_sync(&connector->modeset_retry_work); @@ -17060,78 +17527,49 @@ static void intel_hpd_poll_fini(struct drm_device *dev) drm_connector_list_iter_end(&conn_iter); } -void intel_modeset_driver_remove(struct drm_device *dev) +void intel_modeset_driver_remove(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); - - flush_workqueue(dev_priv->modeset_wq); + flush_workqueue(i915->flip_wq); + flush_workqueue(i915->modeset_wq); - flush_work(&dev_priv->atomic_helper.free_work); - WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list)); + flush_work(&i915->atomic_helper.free_work); + WARN_ON(!llist_empty(&i915->atomic_helper.free_list)); /* * Interrupts and polling as the first thing to avoid creating havoc. * Too much stuff here (turning of connectors, ...) would * experience fancy races otherwise. */ - intel_irq_uninstall(dev_priv); + intel_irq_uninstall(i915); /* * Due to the hpd irq storm handling the hotplug work can re-arm the * poll handlers. Hence disable polling after hpd handling is shut down. */ - intel_hpd_poll_fini(dev); + intel_hpd_poll_fini(i915); /* poll work can call into fbdev, hence clean that up afterwards */ - intel_fbdev_fini(dev_priv); + intel_fbdev_fini(i915); intel_unregister_dsm_handler(); - intel_fbc_global_disable(dev_priv); + intel_fbc_global_disable(i915); /* flush any delayed tasks or pending work */ flush_scheduled_work(); - intel_hdcp_component_fini(dev_priv); - - drm_mode_config_cleanup(dev); - - intel_overlay_cleanup(dev_priv); - - intel_gmbus_teardown(dev_priv); - - destroy_workqueue(dev_priv->modeset_wq); + intel_hdcp_component_fini(i915); - intel_fbc_cleanup_cfb(dev_priv); -} - -/* - * set vga decode state - true == enable VGA decode - */ -int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv, bool state) -{ - unsigned reg = INTEL_GEN(dev_priv) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL; - u16 gmch_ctrl; - - if (pci_read_config_word(dev_priv->bridge_dev, reg, &gmch_ctrl)) { - DRM_ERROR("failed to read control word\n"); - return -EIO; - } + drm_mode_config_cleanup(&i915->drm); - if (!!(gmch_ctrl & INTEL_GMCH_VGA_DISABLE) == !state) - return 0; + intel_overlay_cleanup(i915); - if (state) - gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE; - else - gmch_ctrl |= INTEL_GMCH_VGA_DISABLE; + intel_gmbus_teardown(i915); - if (pci_write_config_word(dev_priv->bridge_dev, reg, gmch_ctrl)) { - DRM_ERROR("failed to write control word\n"); - return -EIO; - } + destroy_workqueue(i915->flip_wq); + destroy_workqueue(i915->modeset_wq); - return 0; + intel_fbc_cleanup_cfb(i915); } #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) @@ -17194,7 +17632,7 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) BUILD_BUG_ON(ARRAY_SIZE(transcoders) != ARRAY_SIZE(error->transcoder)); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) return NULL; error = kzalloc(sizeof(*error), GFP_ATOMIC); @@ -17273,7 +17711,7 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m, if (!error) return; - err_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev_priv)->num_pipes); + err_printf(m, "Num Pipes: %d\n", INTEL_NUM_PIPES(dev_priv)); if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) err_printf(m, "PWR_WELL_CTL2: %08x\n", error->power_well_driver); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index e57e6969051d..7dcb176d91b0 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -1,5 +1,5 @@ /* - * Copyright © 2006-2017 Intel Corporation + * Copyright © 2006-2019 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,13 +27,16 @@ #include <drm/drm_util.h> #include <drm/i915_drm.h> +#include "intel_dp_link_training.h" enum link_m_n_set; struct dpll; struct drm_connector; struct drm_device; +struct drm_display_mode; struct drm_encoder; struct drm_file; +struct drm_format_info; struct drm_framebuffer; struct drm_i915_error_state_buf; struct drm_i915_gem_object; @@ -52,6 +55,7 @@ struct intel_plane; struct intel_plane_state; struct intel_remapped_info; struct intel_rotation_info; +struct intel_crtc_state; enum i915_gpio { GPIOA, @@ -91,6 +95,7 @@ enum pipe { #define pipe_name(p) ((p) + 'A') enum transcoder { + INVALID_TRANSCODER = -1, /* * The following transcoders have a 1:1 transcoder -> pipe mapping, * keep their values fixed: the code assumes that TRANSCODER_A=0, the @@ -182,6 +187,24 @@ enum plane_id { for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \ for_each_if((__crtc)->plane_ids_mask & BIT(__p)) +enum port { + PORT_NONE = -1, + + PORT_A = 0, + PORT_B, + PORT_C, + PORT_D, + PORT_E, + PORT_F, + PORT_G, + PORT_H, + PORT_I, + + I915_MAX_PORTS +}; + +#define port_name(p) ((p) + 'A') + /* * Ports identifier referenced from other drivers. * Expected to remain stable over time @@ -251,6 +274,7 @@ enum aux_ch { AUX_CH_D, AUX_CH_E, /* ICL+ */ AUX_CH_F, + AUX_CH_G, }; #define aux_ch_name(a) ((a) + 'A') @@ -289,10 +313,10 @@ enum phy_fia { }; #define for_each_pipe(__dev_priv, __p) \ - for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) + for ((__p) = 0; (__p) < INTEL_NUM_PIPES(__dev_priv); (__p)++) #define for_each_pipe_masked(__dev_priv, __p, __mask) \ - for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ + for ((__p) = 0; (__p) < INTEL_NUM_PIPES(__dev_priv); (__p)++) \ for_each_if((__mask) & BIT(__p)) #define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \ @@ -330,7 +354,7 @@ enum phy_fia { &(dev)->mode_config.plane_list, \ base.head) \ for_each_if((plane_mask) & \ - drm_plane_mask(&intel_plane->base))) + drm_plane_mask(&intel_plane->base)) #define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) \ list_for_each_entry(intel_plane, \ @@ -411,16 +435,37 @@ enum phy_fia { (__i)++) \ for_each_if(crtc) +#define for_each_oldnew_intel_crtc_in_state_reverse(__state, crtc, old_crtc_state, new_crtc_state, __i) \ + for ((__i) = (__state)->base.dev->mode_config.num_crtc - 1; \ + (__i) >= 0 && \ + ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \ + (old_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].old_state), \ + (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \ + (__i)--) \ + for_each_if(crtc) + +#define intel_atomic_crtc_state_for_each_plane_state( \ + plane, plane_state, \ + crtc_state) \ + for_each_intel_plane_mask(((crtc_state)->base.state->dev), (plane), \ + ((crtc_state)->base.plane_mask)) \ + for_each_if ((plane_state = \ + to_intel_plane_state(__drm_atomic_get_current_plane_state((crtc_state)->base.state, &plane->base)))) + void intel_link_compute_m_n(u16 bpp, int nlanes, int pixel_clock, int link_clock, struct intel_link_m_n *m_n, - bool constant_n); + bool constant_n, bool fec_enable); bool is_ccs_modifier(u64 modifier); void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv); u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, u32 pixel_format, u64 modifier); bool intel_plane_can_remap(const struct intel_plane_state *plane_state); +enum drm_mode_status +intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv, + const struct drm_display_mode *mode); enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port); +bool is_trans_port_sync_mode(const struct intel_crtc_state *state); void intel_plane_destroy(struct drm_plane *plane); void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe); @@ -499,8 +544,6 @@ void intel_dp_get_m_n(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); void intel_dp_set_m_n(const struct intel_crtc_state *crtc_state, enum link_m_n_set m_n); -void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state); int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n); bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, struct dpll *best_clock); @@ -521,7 +564,7 @@ void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center); int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); int skl_max_scale(const struct intel_crtc_state *crtc_state, - u32 pixel_format); + const struct drm_format_info *format); u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); u32 glk_plane_color_ctl_crtc(const struct intel_crtc_state *crtc_state); @@ -544,13 +587,10 @@ void intel_display_print_error_state(struct drm_i915_error_state_buf *e, struct intel_display_error_state *error); /* modesetting */ -void intel_modeset_init_hw(struct drm_device *dev); -int intel_modeset_init(struct drm_device *dev); -void intel_modeset_driver_remove(struct drm_device *dev); -int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv, bool state); +void intel_modeset_init_hw(struct drm_i915_private *i915); +int intel_modeset_init(struct drm_i915_private *i915); +void intel_modeset_driver_remove(struct drm_i915_private *i915); void intel_display_resume(struct drm_device *dev); -void i915_redisable_vga(struct drm_i915_private *dev_priv); -void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv); void intel_init_pch_refclk(struct drm_i915_private *dev_priv); /* modesetting asserts */ diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 12099760d99e..6f9e7927e248 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -3,8 +3,6 @@ * Copyright © 2019 Intel Corporation */ -#include <linux/vgaarb.h> - #include "display/intel_crt.h" #include "display/intel_dp.h" @@ -19,16 +17,14 @@ #include "intel_hotplug.h" #include "intel_sideband.h" #include "intel_tc.h" +#include "intel_vga.h" bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, enum i915_power_well_id power_well_id); const char * -intel_display_power_domain_str(struct drm_i915_private *i915, - enum intel_display_power_domain domain) +intel_display_power_domain_str(enum intel_display_power_domain domain) { - bool ddi_tc_ports = IS_GEN(i915, 12); - switch (domain) { case POWER_DOMAIN_DISPLAY_CORE: return "DISPLAY_CORE"; @@ -71,23 +67,17 @@ intel_display_power_domain_str(struct drm_i915_private *i915, case POWER_DOMAIN_PORT_DDI_C_LANES: return "PORT_DDI_C_LANES"; case POWER_DOMAIN_PORT_DDI_D_LANES: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_D_LANES != - POWER_DOMAIN_PORT_DDI_TC1_LANES); - return ddi_tc_ports ? "PORT_DDI_TC1_LANES" : "PORT_DDI_D_LANES"; + return "PORT_DDI_D_LANES"; case POWER_DOMAIN_PORT_DDI_E_LANES: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_E_LANES != - POWER_DOMAIN_PORT_DDI_TC2_LANES); - return ddi_tc_ports ? "PORT_DDI_TC2_LANES" : "PORT_DDI_E_LANES"; + return "PORT_DDI_E_LANES"; case POWER_DOMAIN_PORT_DDI_F_LANES: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_F_LANES != - POWER_DOMAIN_PORT_DDI_TC3_LANES); - return ddi_tc_ports ? "PORT_DDI_TC3_LANES" : "PORT_DDI_F_LANES"; - case POWER_DOMAIN_PORT_DDI_TC4_LANES: - return "PORT_DDI_TC4_LANES"; - case POWER_DOMAIN_PORT_DDI_TC5_LANES: - return "PORT_DDI_TC5_LANES"; - case POWER_DOMAIN_PORT_DDI_TC6_LANES: - return "PORT_DDI_TC6_LANES"; + return "PORT_DDI_F_LANES"; + case POWER_DOMAIN_PORT_DDI_G_LANES: + return "PORT_DDI_G_LANES"; + case POWER_DOMAIN_PORT_DDI_H_LANES: + return "PORT_DDI_H_LANES"; + case POWER_DOMAIN_PORT_DDI_I_LANES: + return "PORT_DDI_I_LANES"; case POWER_DOMAIN_PORT_DDI_A_IO: return "PORT_DDI_A_IO"; case POWER_DOMAIN_PORT_DDI_B_IO: @@ -95,23 +85,17 @@ intel_display_power_domain_str(struct drm_i915_private *i915, case POWER_DOMAIN_PORT_DDI_C_IO: return "PORT_DDI_C_IO"; case POWER_DOMAIN_PORT_DDI_D_IO: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_D_IO != - POWER_DOMAIN_PORT_DDI_TC1_IO); - return ddi_tc_ports ? "PORT_DDI_TC1_IO" : "PORT_DDI_D_IO"; + return "PORT_DDI_D_IO"; case POWER_DOMAIN_PORT_DDI_E_IO: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_E_IO != - POWER_DOMAIN_PORT_DDI_TC2_IO); - return ddi_tc_ports ? "PORT_DDI_TC2_IO" : "PORT_DDI_E_IO"; + return "PORT_DDI_E_IO"; case POWER_DOMAIN_PORT_DDI_F_IO: - BUILD_BUG_ON(POWER_DOMAIN_PORT_DDI_F_IO != - POWER_DOMAIN_PORT_DDI_TC3_IO); - return ddi_tc_ports ? "PORT_DDI_TC3_IO" : "PORT_DDI_F_IO"; - case POWER_DOMAIN_PORT_DDI_TC4_IO: - return "PORT_DDI_TC4_IO"; - case POWER_DOMAIN_PORT_DDI_TC5_IO: - return "PORT_DDI_TC5_IO"; - case POWER_DOMAIN_PORT_DDI_TC6_IO: - return "PORT_DDI_TC6_IO"; + return "PORT_DDI_F_IO"; + case POWER_DOMAIN_PORT_DDI_G_IO: + return "PORT_DDI_G_IO"; + case POWER_DOMAIN_PORT_DDI_H_IO: + return "PORT_DDI_H_IO"; + case POWER_DOMAIN_PORT_DDI_I_IO: + return "PORT_DDI_I_IO"; case POWER_DOMAIN_PORT_DSI: return "PORT_DSI"; case POWER_DOMAIN_PORT_CRT: @@ -129,34 +113,33 @@ intel_display_power_domain_str(struct drm_i915_private *i915, case POWER_DOMAIN_AUX_C: return "AUX_C"; case POWER_DOMAIN_AUX_D: - BUILD_BUG_ON(POWER_DOMAIN_AUX_D != POWER_DOMAIN_AUX_TC1); - return ddi_tc_ports ? "AUX_TC1" : "AUX_D"; + return "AUX_D"; case POWER_DOMAIN_AUX_E: - BUILD_BUG_ON(POWER_DOMAIN_AUX_E != POWER_DOMAIN_AUX_TC2); - return ddi_tc_ports ? "AUX_TC2" : "AUX_E"; + return "AUX_E"; case POWER_DOMAIN_AUX_F: - BUILD_BUG_ON(POWER_DOMAIN_AUX_F != POWER_DOMAIN_AUX_TC3); - return ddi_tc_ports ? "AUX_TC3" : "AUX_F"; - case POWER_DOMAIN_AUX_TC4: - return "AUX_TC4"; - case POWER_DOMAIN_AUX_TC5: - return "AUX_TC5"; - case POWER_DOMAIN_AUX_TC6: - return "AUX_TC6"; + return "AUX_F"; + case POWER_DOMAIN_AUX_G: + return "AUX_G"; + case POWER_DOMAIN_AUX_H: + return "AUX_H"; + case POWER_DOMAIN_AUX_I: + return "AUX_I"; case POWER_DOMAIN_AUX_IO_A: return "AUX_IO_A"; - case POWER_DOMAIN_AUX_TBT1: - return "AUX_TBT1"; - case POWER_DOMAIN_AUX_TBT2: - return "AUX_TBT2"; - case POWER_DOMAIN_AUX_TBT3: - return "AUX_TBT3"; - case POWER_DOMAIN_AUX_TBT4: - return "AUX_TBT4"; - case POWER_DOMAIN_AUX_TBT5: - return "AUX_TBT5"; - case POWER_DOMAIN_AUX_TBT6: - return "AUX_TBT6"; + case POWER_DOMAIN_AUX_C_TBT: + return "AUX_C_TBT"; + case POWER_DOMAIN_AUX_D_TBT: + return "AUX_D_TBT"; + case POWER_DOMAIN_AUX_E_TBT: + return "AUX_E_TBT"; + case POWER_DOMAIN_AUX_F_TBT: + return "AUX_F_TBT"; + case POWER_DOMAIN_AUX_G_TBT: + return "AUX_G_TBT"; + case POWER_DOMAIN_AUX_H_TBT: + return "AUX_H_TBT"; + case POWER_DOMAIN_AUX_I_TBT: + return "AUX_I_TBT"; case POWER_DOMAIN_GMBUS: return "GMBUS"; case POWER_DOMAIN_INIT: @@ -283,23 +266,8 @@ bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv, u8 irq_pipe_mask, bool has_vga) { - struct pci_dev *pdev = dev_priv->drm.pdev; - - /* - * After we re-enable the power well, if we touch VGA register 0x3d5 - * we'll get unclaimed register interrupts. This stops after we write - * anything to the VGA MSR register. The vgacon module uses this - * register all the time, so if we unbind our driver and, as a - * consequence, bind vgacon, we'll get stuck in an infinite loop at - * console_unlock(). So make here we touch the VGA MSR register, making - * sure vgacon can keep working normally without triggering interrupts - * and error messages. - */ - if (has_vga) { - vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); - outb(inb(VGA_MSR_READ), VGA_MSR_WRITE); - vga_put(pdev, VGA_RSRC_LEGACY_IO); - } + if (has_vga) + intel_vga_reset_io_mem(dev_priv); if (irq_pipe_mask) gen8_irq_power_well_post_enable(dev_priv, irq_pipe_mask); @@ -578,6 +546,8 @@ static void icl_tc_port_assert_ref_held(struct drm_i915_private *dev_priv, #endif +#define TGL_AUX_PW_TO_TC_PORT(pw_idx) ((pw_idx) - TGL_PW_CTL_IDX_AUX_TC1) + static void icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) @@ -594,6 +564,17 @@ icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, I915_WRITE(DP_AUX_CH_CTL(aux_ch), val); hsw_power_well_enable(dev_priv, power_well); + + if (INTEL_GEN(dev_priv) >= 12 && !power_well->desc->hsw.is_tc_tbt) { + enum tc_port tc_port; + + tc_port = TGL_AUX_PW_TO_TC_PORT(power_well->desc->hsw.idx); + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2)); + + if (intel_de_wait_for_set(dev_priv, DKL_CMN_UC_DW_27(tc_port), + DKL_CMN_UC_DW27_UC_HEALTH, 1)) + DRM_WARN("Timeout waiting TC uC health\n"); + } } static void @@ -714,7 +695,11 @@ static u32 gen9_dc_mask(struct drm_i915_private *dev_priv) u32 mask; mask = DC_STATE_EN_UPTO_DC5; - if (INTEL_GEN(dev_priv) >= 11) + + if (INTEL_GEN(dev_priv) >= 12) + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6 + | DC_STATE_EN_DC9; + else if (IS_GEN(dev_priv, 11)) mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9; else if (IS_GEN9_LP(dev_priv)) mask |= DC_STATE_EN_DC9; @@ -784,6 +769,52 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state) dev_priv->csr.dc_state = val & mask; } +static u32 +sanitize_target_dc_state(struct drm_i915_private *dev_priv, + u32 target_dc_state) +{ + u32 states[] = { + DC_STATE_EN_UPTO_DC6, + DC_STATE_EN_UPTO_DC5, + DC_STATE_EN_DC3CO, + DC_STATE_DISABLE, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(states) - 1; i++) { + if (target_dc_state != states[i]) + continue; + + if (dev_priv->csr.allowed_dc_mask & target_dc_state) + break; + + target_dc_state = states[i + 1]; + } + + return target_dc_state; +} + +static void tgl_enable_dc3co(struct drm_i915_private *dev_priv) +{ + DRM_DEBUG_KMS("Enabling DC3CO\n"); + gen9_set_dc_state(dev_priv, DC_STATE_EN_DC3CO); +} + +static void tgl_disable_dc3co(struct drm_i915_private *dev_priv) +{ + u32 val; + + DRM_DEBUG_KMS("Disabling DC3CO\n"); + val = I915_READ(DC_STATE_EN); + val &= ~DC_STATE_DC3CO_STATUS; + I915_WRITE(DC_STATE_EN, val); + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + /* + * Delay of 200us DC3CO Exit time B.Spec 49196 + */ + usleep_range(200, 210); +} + static void bxt_enable_dc9(struct drm_i915_private *dev_priv) { assert_can_enable_dc9(dev_priv); @@ -839,6 +870,51 @@ lookup_power_well(struct drm_i915_private *dev_priv, return &dev_priv->power_domains.power_wells[0]; } +/** + * intel_display_power_set_target_dc_state - Set target dc state. + * @dev_priv: i915 device + * @state: state which needs to be set as target_dc_state. + * + * This function set the "DC off" power well target_dc_state, + * based upon this target_dc_stste, "DC off" power well will + * enable desired DC state. + */ +void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, + u32 state) +{ + struct i915_power_well *power_well; + bool dc_off_enabled; + struct i915_power_domains *power_domains = &dev_priv->power_domains; + + mutex_lock(&power_domains->lock); + power_well = lookup_power_well(dev_priv, SKL_DISP_DC_OFF); + + if (WARN_ON(!power_well)) + goto unlock; + + state = sanitize_target_dc_state(dev_priv, state); + + if (state == dev_priv->csr.target_dc_state) + goto unlock; + + dc_off_enabled = power_well->desc->ops->is_enabled(dev_priv, + power_well); + /* + * If DC off power well is disabled, need to enable and disable the + * DC off power well to effect target DC state. + */ + if (!dc_off_enabled) + power_well->desc->ops->enable(dev_priv, power_well); + + dev_priv->csr.target_dc_state = state; + + if (!dc_off_enabled) + power_well->desc->ops->disable(dev_priv, power_well); + +unlock: + mutex_unlock(&power_domains->lock); +} + static void assert_can_enable_dc5(struct drm_i915_private *dev_priv) { bool pg2_enabled = intel_display_power_well_is_enabled(dev_priv, @@ -951,7 +1027,8 @@ static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv) static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - return (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0; + return ((I915_READ(DC_STATE_EN) & DC_STATE_EN_DC3CO) == 0 && + (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0); } static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) @@ -967,6 +1044,11 @@ static void gen9_disable_dc_states(struct drm_i915_private *dev_priv) { struct intel_cdclk_state cdclk_state = {}; + if (dev_priv->csr.target_dc_state == DC_STATE_EN_DC3CO) { + tgl_disable_dc3co(dev_priv); + return; + } + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); dev_priv->display.get_cdclk(dev_priv, &cdclk_state); @@ -999,10 +1081,17 @@ static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv, if (!dev_priv->csr.dmc_payload) return; - if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC6) + switch (dev_priv->csr.target_dc_state) { + case DC_STATE_EN_DC3CO: + tgl_enable_dc3co(dev_priv); + break; + case DC_STATE_EN_UPTO_DC6: skl_enable_dc6(dev_priv); - else if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC5) + break; + case DC_STATE_EN_UPTO_DC5: gen9_enable_dc5(dev_priv); + break; + } } static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv, @@ -1208,7 +1297,7 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) intel_crt_reset(&encoder->base); } - i915_redisable_vga_power_on(dev_priv); + intel_vga_redisable_power_on(dev_priv); intel_pps_unlock_regs_wa(dev_priv); } @@ -1718,15 +1807,12 @@ __async_put_domains_state_ok(struct i915_power_domains *power_domains) static void print_power_domains(struct i915_power_domains *power_domains, const char *prefix, u64 mask) { - struct drm_i915_private *i915 = - container_of(power_domains, struct drm_i915_private, - power_domains); enum intel_display_power_domain domain; DRM_DEBUG_DRIVER("%s (%lu):\n", prefix, hweight64(mask)); for_each_power_domain(domain, mask) DRM_DEBUG_DRIVER("%s use_count %d\n", - intel_display_power_domain_str(i915, domain), + intel_display_power_domain_str(domain), power_domains->domain_use_count[domain]); } @@ -1896,7 +1982,7 @@ __intel_display_power_put_domain(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains; struct i915_power_well *power_well; - const char *name = intel_display_power_domain_str(dev_priv, domain); + const char *name = intel_display_power_domain_str(domain); power_domains = &dev_priv->power_domains; @@ -2487,10 +2573,10 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_D) | \ BIT_ULL(POWER_DOMAIN_AUX_E) | \ BIT_ULL(POWER_DOMAIN_AUX_F) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT1) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT2) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT3) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT4) | \ + BIT_ULL(POWER_DOMAIN_AUX_C_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -2530,22 +2616,22 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_A)) #define ICL_AUX_B_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_B)) -#define ICL_AUX_C_IO_POWER_DOMAINS ( \ +#define ICL_AUX_C_TC1_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_C)) -#define ICL_AUX_D_IO_POWER_DOMAINS ( \ +#define ICL_AUX_D_TC2_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_D)) -#define ICL_AUX_E_IO_POWER_DOMAINS ( \ +#define ICL_AUX_E_TC3_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_E)) -#define ICL_AUX_F_IO_POWER_DOMAINS ( \ +#define ICL_AUX_F_TC4_IO_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_AUX_F)) -#define ICL_AUX_TBT1_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT1)) -#define ICL_AUX_TBT2_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT2)) -#define ICL_AUX_TBT3_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT3)) -#define ICL_AUX_TBT4_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT4)) +#define ICL_AUX_C_TBT1_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_C_TBT)) +#define ICL_AUX_D_TBT2_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT)) +#define ICL_AUX_E_TBT3_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT)) +#define ICL_AUX_F_TBT4_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT)) #define TGL_PW_5_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_PIPE_D) | \ @@ -2565,24 +2651,24 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_PIPE_B) | \ BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC1_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC2_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC3_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC4_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC5_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC6_LANES) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC1) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC2) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC3) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC4) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC5) | \ - BIT_ULL(POWER_DOMAIN_AUX_TC6) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT1) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT2) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT3) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT4) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT5) | \ - BIT_ULL(POWER_DOMAIN_AUX_TBT6) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_G_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_H_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_I_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUX_E) | \ + BIT_ULL(POWER_DOMAIN_AUX_F) | \ + BIT_ULL(POWER_DOMAIN_AUX_G) | \ + BIT_ULL(POWER_DOMAIN_AUX_H) | \ + BIT_ULL(POWER_DOMAIN_AUX_I) | \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_G_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_H_TBT) | \ + BIT_ULL(POWER_DOMAIN_AUX_I_TBT) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -2598,35 +2684,50 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define TGL_DDI_IO_TC1_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC1_IO)) -#define TGL_DDI_IO_TC2_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC2_IO)) -#define TGL_DDI_IO_TC3_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC3_IO)) -#define TGL_DDI_IO_TC4_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC4_IO)) -#define TGL_DDI_IO_TC5_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC5_IO)) -#define TGL_DDI_IO_TC6_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_TC6_IO)) - -#define TGL_AUX_TC1_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC1)) -#define TGL_AUX_TC2_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC2)) -#define TGL_AUX_TC3_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC3)) -#define TGL_AUX_TC4_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC4)) -#define TGL_AUX_TC5_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC5)) -#define TGL_AUX_TC6_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TC6)) -#define TGL_AUX_TBT5_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT5)) -#define TGL_AUX_TBT6_IO_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_AUX_TBT6)) +#define TGL_DDI_IO_D_TC1_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO)) +#define TGL_DDI_IO_E_TC2_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO)) +#define TGL_DDI_IO_F_TC3_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO)) +#define TGL_DDI_IO_G_TC4_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_G_IO)) +#define TGL_DDI_IO_H_TC5_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_H_IO)) +#define TGL_DDI_IO_I_TC6_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_I_IO)) + +#define TGL_AUX_A_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_IO_A) | \ + BIT_ULL(POWER_DOMAIN_AUX_A)) +#define TGL_AUX_B_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_B)) +#define TGL_AUX_C_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_C)) +#define TGL_AUX_D_TC1_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D)) +#define TGL_AUX_E_TC2_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_E)) +#define TGL_AUX_F_TC3_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_F)) +#define TGL_AUX_G_TC4_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_G)) +#define TGL_AUX_H_TC5_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_H)) +#define TGL_AUX_I_TC6_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_I)) +#define TGL_AUX_D_TBT1_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_D_TBT)) +#define TGL_AUX_E_TBT2_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_E_TBT)) +#define TGL_AUX_F_TBT3_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_F_TBT)) +#define TGL_AUX_G_TBT4_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_G_TBT)) +#define TGL_AUX_H_TBT5_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_H_TBT)) +#define TGL_AUX_I_TBT6_IO_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_AUX_I_TBT)) static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_power_well_sync_hw_noop, @@ -2938,7 +3039,7 @@ static const struct i915_power_well_desc skl_power_wells[] = { .name = "DC off", .domains = SKL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3020,7 +3121,7 @@ static const struct i915_power_well_desc bxt_power_wells[] = { .name = "DC off", .domains = BXT_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3080,7 +3181,7 @@ static const struct i915_power_well_desc glk_power_wells[] = { .name = "DC off", .domains = GLK_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3249,7 +3350,7 @@ static const struct i915_power_well_desc cnl_power_wells[] = { .name = "DC off", .domains = CNL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3377,7 +3478,7 @@ static const struct i915_power_well_desc icl_power_wells[] = { .name = "DC off", .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3484,8 +3585,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX C", - .domains = ICL_AUX_C_IO_POWER_DOMAINS, + .name = "AUX C TC1", + .domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3495,8 +3596,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX D", - .domains = ICL_AUX_D_IO_POWER_DOMAINS, + .name = "AUX D TC2", + .domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3506,8 +3607,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX E", - .domains = ICL_AUX_E_IO_POWER_DOMAINS, + .name = "AUX E TC3", + .domains = ICL_AUX_E_TC3_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3517,8 +3618,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX F", - .domains = ICL_AUX_F_IO_POWER_DOMAINS, + .name = "AUX F TC4", + .domains = ICL_AUX_F_TC4_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3528,8 +3629,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT1", - .domains = ICL_AUX_TBT1_IO_POWER_DOMAINS, + .name = "AUX C TBT1", + .domains = ICL_AUX_C_TBT1_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3539,8 +3640,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT2", - .domains = ICL_AUX_TBT2_IO_POWER_DOMAINS, + .name = "AUX D TBT2", + .domains = ICL_AUX_D_TBT2_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3550,8 +3651,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT3", - .domains = ICL_AUX_TBT3_IO_POWER_DOMAINS, + .name = "AUX E TBT3", + .domains = ICL_AUX_E_TBT3_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3561,8 +3662,8 @@ static const struct i915_power_well_desc icl_power_wells[] = { }, }, { - .name = "AUX TBT4", - .domains = ICL_AUX_TBT4_IO_POWER_DOMAINS, + .name = "AUX F TBT4", + .domains = ICL_AUX_F_TBT4_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3610,7 +3711,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { .name = "DC off", .domains = TGL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, + .id = SKL_DISP_DC_OFF, }, { .name = "power well 2", @@ -3667,8 +3768,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { } }, { - .name = "DDI TC1 IO", - .domains = TGL_DDI_IO_TC1_POWER_DOMAINS, + .name = "DDI D TC1 IO", + .domains = TGL_DDI_IO_D_TC1_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3677,8 +3778,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "DDI TC2 IO", - .domains = TGL_DDI_IO_TC2_POWER_DOMAINS, + .name = "DDI E TC2 IO", + .domains = TGL_DDI_IO_E_TC2_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3687,8 +3788,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "DDI TC3 IO", - .domains = TGL_DDI_IO_TC3_POWER_DOMAINS, + .name = "DDI F TC3 IO", + .domains = TGL_DDI_IO_F_TC3_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3697,8 +3798,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "DDI TC4 IO", - .domains = TGL_DDI_IO_TC4_POWER_DOMAINS, + .name = "DDI G TC4 IO", + .domains = TGL_DDI_IO_G_TC4_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3707,8 +3808,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "DDI TC5 IO", - .domains = TGL_DDI_IO_TC5_POWER_DOMAINS, + .name = "DDI H TC5 IO", + .domains = TGL_DDI_IO_H_TC5_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3717,8 +3818,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "DDI TC6 IO", - .domains = TGL_DDI_IO_TC6_POWER_DOMAINS, + .name = "DDI I TC6 IO", + .domains = TGL_DDI_IO_I_TC6_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3728,7 +3829,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, { .name = "AUX A", - .domains = ICL_AUX_A_IO_POWER_DOMAINS, + .domains = TGL_AUX_A_IO_POWER_DOMAINS, .ops = &icl_combo_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3738,7 +3839,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, { .name = "AUX B", - .domains = ICL_AUX_B_IO_POWER_DOMAINS, + .domains = TGL_AUX_B_IO_POWER_DOMAINS, .ops = &icl_combo_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3748,7 +3849,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, { .name = "AUX C", - .domains = ICL_AUX_C_IO_POWER_DOMAINS, + .domains = TGL_AUX_C_IO_POWER_DOMAINS, .ops = &icl_combo_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3757,8 +3858,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC1", - .domains = TGL_AUX_TC1_IO_POWER_DOMAINS, + .name = "AUX D TC1", + .domains = TGL_AUX_D_TC1_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3768,8 +3869,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC2", - .domains = TGL_AUX_TC2_IO_POWER_DOMAINS, + .name = "AUX E TC2", + .domains = TGL_AUX_E_TC2_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3779,8 +3880,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC3", - .domains = TGL_AUX_TC3_IO_POWER_DOMAINS, + .name = "AUX F TC3", + .domains = TGL_AUX_F_TC3_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3790,8 +3891,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC4", - .domains = TGL_AUX_TC4_IO_POWER_DOMAINS, + .name = "AUX G TC4", + .domains = TGL_AUX_G_TC4_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3801,8 +3902,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC5", - .domains = TGL_AUX_TC5_IO_POWER_DOMAINS, + .name = "AUX H TC5", + .domains = TGL_AUX_H_TC5_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3812,8 +3913,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TC6", - .domains = TGL_AUX_TC6_IO_POWER_DOMAINS, + .name = "AUX I TC6", + .domains = TGL_AUX_I_TC6_IO_POWER_DOMAINS, .ops = &icl_tc_phy_aux_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3823,8 +3924,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT1", - .domains = ICL_AUX_TBT1_IO_POWER_DOMAINS, + .name = "AUX D TBT1", + .domains = TGL_AUX_D_TBT1_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3834,8 +3935,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT2", - .domains = ICL_AUX_TBT2_IO_POWER_DOMAINS, + .name = "AUX E TBT2", + .domains = TGL_AUX_E_TBT2_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3845,8 +3946,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT3", - .domains = ICL_AUX_TBT3_IO_POWER_DOMAINS, + .name = "AUX F TBT3", + .domains = TGL_AUX_F_TBT3_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3856,8 +3957,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT4", - .domains = ICL_AUX_TBT4_IO_POWER_DOMAINS, + .name = "AUX G TBT4", + .domains = TGL_AUX_G_TBT4_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3867,8 +3968,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT5", - .domains = TGL_AUX_TBT5_IO_POWER_DOMAINS, + .name = "AUX H TBT5", + .domains = TGL_AUX_H_TBT5_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3878,8 +3979,8 @@ static const struct i915_power_well_desc tgl_power_wells[] = { }, }, { - .name = "AUX TBT6", - .domains = TGL_AUX_TBT6_IO_POWER_DOMAINS, + .name = "AUX I TBT6", + .domains = TGL_AUX_I_TBT6_IO_POWER_DOMAINS, .ops = &hsw_power_well_ops, .id = DISP_PW_ID_NONE, { @@ -3931,14 +4032,17 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, int requested_dc; int max_dc; - if (INTEL_GEN(dev_priv) >= 11) { - max_dc = 2; + if (INTEL_GEN(dev_priv) >= 12) { + max_dc = 4; /* * DC9 has a separate HW flow from the rest of the DC states, * not depending on the DMC firmware. It's needed by system * suspend/resume, so allow it unconditionally. */ mask = DC_STATE_EN_DC9; + } else if (IS_GEN(dev_priv, 11)) { + max_dc = 2; + mask = DC_STATE_EN_DC9; } else if (IS_GEN(dev_priv, 10) || IS_GEN9_BC(dev_priv)) { max_dc = 2; mask = 0; @@ -3957,7 +4061,7 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, requested_dc = enable_dc; } else if (enable_dc == -1) { requested_dc = max_dc; - } else if (enable_dc > max_dc && enable_dc <= 2) { + } else if (enable_dc > max_dc && enable_dc <= 4) { DRM_DEBUG_KMS("Adjusting requested max DC state (%d->%d)\n", enable_dc, max_dc); requested_dc = max_dc; @@ -3966,10 +4070,20 @@ static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv, requested_dc = max_dc; } - if (requested_dc > 1) + switch (requested_dc) { + case 4: + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC6; + break; + case 3: + mask |= DC_STATE_EN_DC3CO | DC_STATE_EN_UPTO_DC5; + break; + case 2: mask |= DC_STATE_EN_UPTO_DC6; - if (requested_dc > 0) + break; + case 1: mask |= DC_STATE_EN_UPTO_DC5; + break; + } DRM_DEBUG_KMS("Allowed DC state mask %02x\n", mask); @@ -4030,6 +4144,9 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) dev_priv->csr.allowed_dc_mask = get_allowed_dc_mask(dev_priv, i915_modparams.enable_dc); + dev_priv->csr.target_dc_state = + sanitize_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); + BUILD_BUG_ON(POWER_DOMAIN_NUM > 64); mutex_init(&power_domains->lock); @@ -5104,8 +5221,7 @@ static void intel_power_domains_dump_info(struct drm_i915_private *i915) for_each_power_domain(domain, power_well->desc->domains) DRM_DEBUG_DRIVER(" %-23s %d\n", - intel_display_power_domain_str(i915, - domain), + intel_display_power_domain_str(domain), power_domains->domain_use_count[domain]); } } diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h index a50605b8b1ad..1da04f3e0fb3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.h +++ b/drivers/gpu/drm/i915/display/intel_display_power.h @@ -36,29 +36,20 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_DDI_B_LANES, POWER_DOMAIN_PORT_DDI_C_LANES, POWER_DOMAIN_PORT_DDI_D_LANES, - POWER_DOMAIN_PORT_DDI_TC1_LANES = POWER_DOMAIN_PORT_DDI_D_LANES, POWER_DOMAIN_PORT_DDI_E_LANES, - POWER_DOMAIN_PORT_DDI_TC2_LANES = POWER_DOMAIN_PORT_DDI_E_LANES, POWER_DOMAIN_PORT_DDI_F_LANES, - POWER_DOMAIN_PORT_DDI_TC3_LANES = POWER_DOMAIN_PORT_DDI_F_LANES, - POWER_DOMAIN_PORT_DDI_TC4_LANES, - POWER_DOMAIN_PORT_DDI_TC5_LANES, - POWER_DOMAIN_PORT_DDI_TC6_LANES, + POWER_DOMAIN_PORT_DDI_G_LANES, + POWER_DOMAIN_PORT_DDI_H_LANES, + POWER_DOMAIN_PORT_DDI_I_LANES, POWER_DOMAIN_PORT_DDI_A_IO, POWER_DOMAIN_PORT_DDI_B_IO, POWER_DOMAIN_PORT_DDI_C_IO, POWER_DOMAIN_PORT_DDI_D_IO, - POWER_DOMAIN_PORT_DDI_TC1_IO = POWER_DOMAIN_PORT_DDI_D_IO, POWER_DOMAIN_PORT_DDI_E_IO, - POWER_DOMAIN_PORT_DDI_TC2_IO = POWER_DOMAIN_PORT_DDI_E_IO, POWER_DOMAIN_PORT_DDI_F_IO, - POWER_DOMAIN_PORT_DDI_TC3_IO = POWER_DOMAIN_PORT_DDI_F_IO, POWER_DOMAIN_PORT_DDI_G_IO, - POWER_DOMAIN_PORT_DDI_TC4_IO = POWER_DOMAIN_PORT_DDI_G_IO, POWER_DOMAIN_PORT_DDI_H_IO, - POWER_DOMAIN_PORT_DDI_TC5_IO = POWER_DOMAIN_PORT_DDI_H_IO, POWER_DOMAIN_PORT_DDI_I_IO, - POWER_DOMAIN_PORT_DDI_TC6_IO = POWER_DOMAIN_PORT_DDI_I_IO, POWER_DOMAIN_PORT_DSI, POWER_DOMAIN_PORT_CRT, POWER_DOMAIN_PORT_OTHER, @@ -68,21 +59,19 @@ enum intel_display_power_domain { POWER_DOMAIN_AUX_B, POWER_DOMAIN_AUX_C, POWER_DOMAIN_AUX_D, - POWER_DOMAIN_AUX_TC1 = POWER_DOMAIN_AUX_D, POWER_DOMAIN_AUX_E, - POWER_DOMAIN_AUX_TC2 = POWER_DOMAIN_AUX_E, POWER_DOMAIN_AUX_F, - POWER_DOMAIN_AUX_TC3 = POWER_DOMAIN_AUX_F, - POWER_DOMAIN_AUX_TC4, - POWER_DOMAIN_AUX_TC5, - POWER_DOMAIN_AUX_TC6, + POWER_DOMAIN_AUX_G, + POWER_DOMAIN_AUX_H, + POWER_DOMAIN_AUX_I, POWER_DOMAIN_AUX_IO_A, - POWER_DOMAIN_AUX_TBT1, - POWER_DOMAIN_AUX_TBT2, - POWER_DOMAIN_AUX_TBT3, - POWER_DOMAIN_AUX_TBT4, - POWER_DOMAIN_AUX_TBT5, - POWER_DOMAIN_AUX_TBT6, + POWER_DOMAIN_AUX_C_TBT, + POWER_DOMAIN_AUX_D_TBT, + POWER_DOMAIN_AUX_E_TBT, + POWER_DOMAIN_AUX_F_TBT, + POWER_DOMAIN_AUX_G_TBT, + POWER_DOMAIN_AUX_H_TBT, + POWER_DOMAIN_AUX_I_TBT, POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, POWER_DOMAIN_GT_IRQ, @@ -111,6 +100,7 @@ enum i915_power_well_id { SKL_DISP_PW_MISC_IO, SKL_DISP_PW_1, SKL_DISP_PW_2, + SKL_DISP_DC_OFF, }; #define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A) @@ -267,10 +257,11 @@ void intel_display_power_suspend_late(struct drm_i915_private *i915); void intel_display_power_resume_early(struct drm_i915_private *i915); void intel_display_power_suspend(struct drm_i915_private *i915); void intel_display_power_resume(struct drm_i915_private *i915); +void intel_display_power_set_target_dc_state(struct drm_i915_private *dev_priv, + u32 state); const char * -intel_display_power_domain_str(struct drm_i915_private *i915, - enum intel_display_power_domain domain); +intel_display_power_domain_str(enum intel_display_power_domain domain); bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 449abaea619f..8358152e403e 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -388,6 +388,13 @@ struct intel_hdcp { wait_queue_head_t cp_irq_queue; atomic_t cp_irq_count; int cp_irq_count_cached; + + /* + * HDCP register access for gen12+ need the transcoder associated. + * Transcoder attached to the connector could be changed at modeset. + * Hence caching the transcoder here. + */ + enum transcoder cpu_transcoder; }; struct intel_connector { @@ -481,9 +488,9 @@ struct intel_atomic_state { * but the converse is not necessarily true; simply changing a mode may * not flip the final active status of any CRTC's */ - unsigned int active_pipe_changes; + u8 active_pipe_changes; - unsigned int active_crtcs; + u8 active_pipes; /* minimum acceptable cdclk for each pipe */ int min_cdclk[I915_MAX_PIPES]; /* minimum acceptable voltage level for each pipe */ @@ -552,24 +559,24 @@ struct intel_plane_state { int scaler_id; /* - * linked_plane: + * planar_linked_plane: * * ICL planar formats require 2 planes that are updated as pairs. * This member is used to make sure the other plane is also updated * when required, and for update_slave() to find the correct * plane_state to pass as argument. */ - struct intel_plane *linked_plane; + struct intel_plane *planar_linked_plane; /* - * slave: + * planar_slave: * If set don't update use the linked plane's state for updating * this plane during atomic commit with the update_slave() callback. * * It's also used by the watermark code to ignore wm calculations on * this plane. They're calculated by the linked plane's wm code. */ - u32 slave; + u32 planar_slave; struct drm_intel_sprite_colorkey ckey; }; @@ -759,7 +766,6 @@ struct intel_crtc_state { bool update_pipe; /* can a fast modeset be performed? */ bool disable_cxsr; bool update_wm_pre, update_wm_post; /* watermarks are updated */ - bool fb_changed; /* fb on any of the planes is changed */ bool fifo_changed; /* FIFO split is changed */ /* Pipe source size (ie. panel fitter input size) @@ -864,6 +870,7 @@ struct intel_crtc_state { bool has_psr; bool has_psr2; + u32 dc3co_exitline; /* * Frequence the dpll for the port should run at. Differs from the @@ -984,6 +991,12 @@ struct intel_crtc_state { /* Forward Error correction State */ bool fec_enable; + + /* Pointer to master transcoder in case of tiled displays */ + enum transcoder master_transcoder; + + /* Bitmask to indicate slaves attached */ + u8 sync_mode_slaves_mask; }; struct intel_crtc { @@ -1026,6 +1039,9 @@ struct intel_crtc { /* scalers available on this crtc */ int num_scalers; + + /* per pipe DSB related info */ + struct intel_dsb dsb; }; struct intel_plane { @@ -1176,6 +1192,7 @@ struct intel_dp { /* sink or branch descriptor */ struct drm_dp_desc desc; struct drm_dp_aux aux; + u32 aux_busy_last_status; u8 train_set[4]; int panel_power_up_delay; int panel_power_down_delay; @@ -1211,6 +1228,15 @@ struct intel_dp { bool can_mst; /* this port supports mst */ bool is_mst; int active_mst_links; + + /* + * DP_TP_* registers may be either on port or transcoder register space. + */ + struct { + i915_reg_t dp_tp_ctl; + i915_reg_t dp_tp_status; + } regs; + /* connector directly attached - won't be use for modeset in mst world */ struct intel_connector *attached_connector; @@ -1269,6 +1295,7 @@ struct intel_digital_port { char tc_port_name[8]; enum tc_port_mode tc_mode; enum phy_fia tc_phy_fia; + u8 tc_phy_fia_idx; void (*write_infoframe)(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, @@ -1509,7 +1536,7 @@ intel_wait_for_vblank(struct drm_i915_private *dev_priv, enum pipe pipe) drm_wait_one_vblank(&dev_priv->drm, pipe); } static inline void -intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, int pipe) +intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, enum pipe pipe) { const struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2950e9308a4f..5eeafa45831a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -68,18 +68,13 @@ #define DP_DPRX_ESI_LEN 14 -/* DP DSC small joiner has 2 FIFOs each of 640 x 6 bytes */ -#define DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER 61440 -#define DP_DSC_MIN_SUPPORTED_BPC 8 -#define DP_DSC_MAX_SUPPORTED_BPC 10 - /* DP DSC throughput values used for slice count calculations KPixels/s */ #define DP_DSC_PEAK_PIXEL_RATE 2720000 #define DP_DSC_MAX_ENC_THROUGHPUT_0 340000 #define DP_DSC_MAX_ENC_THROUGHPUT_1 400000 -/* DP DSC FEC Overhead factor = (100 - 2.4)/100 */ -#define DP_DSC_FEC_OVERHEAD_FACTOR 976 +/* DP DSC FEC Overhead factor = 1/(0.972261) */ +#define DP_DSC_FEC_OVERHEAD_FACTOR 972261 /* Compliance test status bits */ #define INTEL_DP_RESOLUTION_SHIFT_MASK 0 @@ -494,6 +489,127 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, return 0; } +u32 intel_dp_mode_to_fec_clock(u32 mode_clock) +{ + return div_u64(mul_u32_u32(mode_clock, 1000000U), + DP_DSC_FEC_OVERHEAD_FACTOR); +} + +static int +small_joiner_ram_size_bits(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 11) + return 7680 * 8; + else + return 6144 * 8; +} + +static u16 intel_dp_dsc_get_output_bpp(struct drm_i915_private *i915, + u32 link_clock, u32 lane_count, + u32 mode_clock, u32 mode_hdisplay) +{ + u32 bits_per_pixel, max_bpp_small_joiner_ram; + int i; + + /* + * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)* + * (LinkSymbolClock)* 8 * (TimeSlotsPerMTP) + * for SST -> TimeSlotsPerMTP is 1, + * for MST -> TimeSlotsPerMTP has to be calculated + */ + bits_per_pixel = (link_clock * lane_count * 8) / + intel_dp_mode_to_fec_clock(mode_clock); + DRM_DEBUG_KMS("Max link bpp: %u\n", bits_per_pixel); + + /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ + max_bpp_small_joiner_ram = small_joiner_ram_size_bits(i915) / + mode_hdisplay; + DRM_DEBUG_KMS("Max small joiner bpp: %u\n", max_bpp_small_joiner_ram); + + /* + * Greatest allowed DSC BPP = MIN (output BPP from available Link BW + * check, output bpp from small joiner RAM check) + */ + bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram); + + /* Error out if the max bpp is less than smallest allowed valid bpp */ + if (bits_per_pixel < valid_dsc_bpp[0]) { + DRM_DEBUG_KMS("Unsupported BPP %u, min %u\n", + bits_per_pixel, valid_dsc_bpp[0]); + return 0; + } + + /* Find the nearest match in the array of known BPPs from VESA */ + for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) { + if (bits_per_pixel < valid_dsc_bpp[i + 1]) + break; + } + bits_per_pixel = valid_dsc_bpp[i]; + + /* + * Compressed BPP in U6.4 format so multiply by 16, for Gen 11, + * fractional part is 0 + */ + return bits_per_pixel << 4; +} + +static u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, + int mode_clock, int mode_hdisplay) +{ + u8 min_slice_count, i; + int max_slice_width; + + if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE) + min_slice_count = DIV_ROUND_UP(mode_clock, + DP_DSC_MAX_ENC_THROUGHPUT_0); + else + min_slice_count = DIV_ROUND_UP(mode_clock, + DP_DSC_MAX_ENC_THROUGHPUT_1); + + max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd); + if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) { + DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n", + max_slice_width); + return 0; + } + /* Also take into account max slice width */ + min_slice_count = min_t(u8, min_slice_count, + DIV_ROUND_UP(mode_hdisplay, + max_slice_width)); + + /* Find the closest match to the valid slice count values */ + for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) { + if (valid_dsc_slicecount[i] > + drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, + false)) + break; + if (min_slice_count <= valid_dsc_slicecount[i]) + return valid_dsc_slicecount[i]; + } + + DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count); + return 0; +} + +static bool intel_dp_hdisplay_bad(struct drm_i915_private *dev_priv, + int hdisplay) +{ + /* + * Older platforms don't like hdisplay==4096 with DP. + * + * On ILK/SNB/IVB the pipe seems to be somewhat running (scanline + * and frame counter increment), but we don't get vblank interrupts, + * and the pipe underruns immediately. The link also doesn't seem + * to get trained properly. + * + * On CHV the vblank interrupts don't seem to disappear but + * otherwise the symptoms are similar. + * + * TODO: confirm the behaviour on HSW+ + */ + return hdisplay == 4096 && !HAS_DDI(dev_priv); +} + static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) @@ -529,6 +645,9 @@ intel_dp_mode_valid(struct drm_connector *connector, max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes); mode_rate = intel_dp_link_required(target_clock, 18); + if (intel_dp_hdisplay_bad(dev_priv, mode->hdisplay)) + return MODE_H_ILLEGAL; + /* * Output bpp is stored in 6.4 format so right shift by 4 to get the * integer value since we support only integer values of bpp. @@ -543,7 +662,8 @@ intel_dp_mode_valid(struct drm_connector *connector, true); } else if (drm_dp_sink_supports_fec(intel_dp->fec_capable)) { dsc_max_output_bpp = - intel_dp_dsc_get_output_bpp(max_link_clock, + intel_dp_dsc_get_output_bpp(dev_priv, + max_link_clock, max_lanes, target_clock, mode->hdisplay) >> 4; @@ -564,7 +684,7 @@ intel_dp_mode_valid(struct drm_connector *connector, if (mode->flags & DRM_MODE_FLAG_DBLCLK) return MODE_H_ILLEGAL; - return MODE_OK; + return intel_mode_valid_max_plane_size(dev_priv, mode); } u32 intel_dp_pack_aux(const u8 *src, int src_bytes) @@ -641,12 +761,14 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) u32 DP; if (WARN(I915_READ(intel_dp->output_reg) & DP_PORT_EN, - "skipping pipe %c power sequencer kick due to port %c being active\n", - pipe_name(pipe), port_name(intel_dig_port->base.port))) + "skipping pipe %c power sequencer kick due to [ENCODER:%d:%s] being active\n", + pipe_name(pipe), intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name)) return; - DRM_DEBUG_KMS("kicking pipe %c power sequencer for port %c\n", - pipe_name(pipe), port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("kicking pipe %c power sequencer for [ENCODER:%d:%s]\n", + pipe_name(pipe), intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); /* Preserve the BIOS-computed detected bit. This is * supposed to be read-only. @@ -764,9 +886,10 @@ vlv_power_sequencer_pipe(struct intel_dp *intel_dp) vlv_steal_power_sequencer(dev_priv, pipe); intel_dp->pps_pipe = pipe; - DRM_DEBUG_KMS("picked pipe %c power sequencer for port %c\n", + DRM_DEBUG_KMS("picked pipe %c power sequencer for [ENCODER:%d:%s]\n", pipe_name(intel_dp->pps_pipe), - port_name(intel_dig_port->base.port)); + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(intel_dp); @@ -874,13 +997,16 @@ vlv_initial_power_sequencer_setup(struct intel_dp *intel_dp) /* didn't find one? just let vlv_power_sequencer_pipe() pick one when needed */ if (intel_dp->pps_pipe == INVALID_PIPE) { - DRM_DEBUG_KMS("no initial power sequencer for port %c\n", - port_name(port)); + DRM_DEBUG_KMS("no initial power sequencer for [ENCODER:%d:%s]\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); return; } - DRM_DEBUG_KMS("initial power sequencer for port %c: pipe %c\n", - port_name(port), pipe_name(intel_dp->pps_pipe)); + DRM_DEBUG_KMS("initial power sequencer for [ENCODER:%d:%s]: pipe %c\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name, + pipe_name(intel_dp->pps_pipe)); intel_dp_init_panel_power_sequencer(intel_dp); intel_dp_init_panel_power_sequencer_registers(intel_dp, false); @@ -1243,13 +1369,12 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, trace_i915_reg_rw(false, ch_ctl, status, sizeof(status), true); if (try == 3) { - static u32 last_status = -1; const u32 status = intel_uncore_read(uncore, ch_ctl); - if (status != last_status) { + if (status != intel_dp->aux_busy_last_status) { WARN(1, "dp_aux_ch not started status 0x%08x\n", status); - last_status = status; + intel_dp->aux_busy_last_status = status; } ret = -EBUSY; @@ -1541,6 +1666,7 @@ static i915_reg_t skl_aux_ctl_reg(struct intel_dp *intel_dp) case AUX_CH_D: case AUX_CH_E: case AUX_CH_F: + case AUX_CH_G: return DP_AUX_CH_CTL(aux_ch); default: MISSING_CASE(aux_ch); @@ -1561,6 +1687,7 @@ static i915_reg_t skl_aux_data_reg(struct intel_dp *intel_dp, int index) case AUX_CH_D: case AUX_CH_E: case AUX_CH_F: + case AUX_CH_G: return DP_AUX_CH_DATA(aux_ch, index); default: MISSING_CASE(aux_ch); @@ -1739,8 +1866,14 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - return INTEL_GEN(dev_priv) >= 11 && - pipe_config->cpu_transcoder != TRANSCODER_A; + /* On TGL, FEC is supported on all Pipes */ + if (INTEL_GEN(dev_priv) >= 12) + return true; + + if (IS_GEN(dev_priv, 11) && pipe_config->cpu_transcoder != TRANSCODER_A) + return true; + + return false; } static bool intel_dp_supports_fec(struct intel_dp *intel_dp, @@ -1755,8 +1888,15 @@ static bool intel_dp_source_supports_dsc(struct intel_dp *intel_dp, { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - return INTEL_GEN(dev_priv) >= 10 && - pipe_config->cpu_transcoder != TRANSCODER_A; + /* On TGL, DSC is supported on all Pipes */ + if (INTEL_GEN(dev_priv) >= 12) + return true; + + if (INTEL_GEN(dev_priv) >= 10 && + pipe_config->cpu_transcoder != TRANSCODER_A) + return true; + + return false; } static bool intel_dp_supports_dsc(struct intel_dp *intel_dp, @@ -1915,11 +2055,17 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, if (!intel_dp_supports_dsc(intel_dp, pipe_config)) return -EINVAL; - dsc_max_bpc = min_t(u8, DP_DSC_MAX_SUPPORTED_BPC, - conn_state->max_requested_bpc); + /* Max DSC Input BPC for ICL is 10 and for TGL+ is 12 */ + if (INTEL_GEN(dev_priv) >= 12) + dsc_max_bpc = min_t(u8, 12, conn_state->max_requested_bpc); + else + dsc_max_bpc = min_t(u8, 10, + conn_state->max_requested_bpc); pipe_bpp = intel_dp_dsc_compute_bpp(intel_dp, dsc_max_bpc); - if (pipe_bpp < DP_DSC_MIN_SUPPORTED_BPC * 3) { + + /* Min Input BPC for ICL+ is 8 */ + if (pipe_bpp < 8 * 3) { DRM_DEBUG_KMS("No DSC support for less than 8bpc\n"); return -EINVAL; } @@ -1945,7 +2091,8 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp, u8 dsc_dp_slice_count; dsc_max_output_bpp = - intel_dp_dsc_get_output_bpp(pipe_config->port_clock, + intel_dp_dsc_get_output_bpp(dev_priv, + pipe_config->port_clock, pipe_config->lane_count, adjusted_mode->crtc_clock, adjusted_mode->crtc_hdisplay); @@ -2127,6 +2274,16 @@ bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state, const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + /* + * Our YCbCr output is always limited range. + * crtc_state->limited_color_range only applies to RGB, + * and it must never be set for YCbCr or we risk setting + * some conflicting bits in PIPECONF which will mess up + * the colors on the monitor. + */ + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + return false; + if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* * See: @@ -2164,6 +2321,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = true; pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; + if (lspcon->active) lspcon_ycbcr420_config(&intel_connector->base, pipe_config); else @@ -2209,6 +2367,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return -EINVAL; + if (intel_dp_hdisplay_bad(dev_priv, adjusted_mode->crtc_hdisplay)) + return -EINVAL; + ret = intel_dp_compute_link_config(encoder, pipe_config, conn_state); if (ret < 0) return ret; @@ -2226,7 +2387,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, adjusted_mode->crtc_clock, pipe_config->port_clock, &pipe_config->dp_m_n, - constant_n); + constant_n, pipe_config->fec_enable); if (intel_connector->panel.downclock_mode != NULL && dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { @@ -2236,7 +2397,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_connector->panel.downclock_mode->clock, pipe_config->port_clock, &pipe_config->dp_m2_n2, - constant_n); + constant_n, pipe_config->fec_enable); } if (!HAS_DDI(dev_priv)) @@ -2244,6 +2405,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_psr_compute_config(intel_dp, pipe_config); + intel_hdcp_transcoder_config(intel_connector, + pipe_config->cpu_transcoder); + return 0; } @@ -2271,6 +2435,9 @@ static void intel_dp_prepare(struct intel_encoder *encoder, intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)); + intel_dp->regs.dp_tp_ctl = DP_TP_CTL(port); + intel_dp->regs.dp_tp_status = DP_TP_STATUS(port); + /* * There are four kinds of DP registers: * @@ -2472,8 +2639,9 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) intel_display_power_get(dev_priv, intel_aux_power_domain(intel_dig_port)); - DRM_DEBUG_KMS("Turning eDP port %c VDD on\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("Turning [ENCODER:%d:%s] VDD on\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); if (!edp_have_panel_power(intel_dp)) wait_panel_power_cycle(intel_dp); @@ -2492,8 +2660,9 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) * If the panel wasn't on, delay before accessing aux channel */ if (!edp_have_panel_power(intel_dp)) { - DRM_DEBUG_KMS("eDP port %c panel power wasn't enabled\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("[ENCODER:%d:%s] panel power wasn't enabled\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); msleep(intel_dp->panel_power_up_delay); } @@ -2518,8 +2687,9 @@ void intel_edp_panel_vdd_on(struct intel_dp *intel_dp) vdd = false; with_pps_lock(intel_dp, wakeref) vdd = edp_panel_vdd_on(intel_dp); - I915_STATE_WARN(!vdd, "eDP port %c VDD already requested on\n", - port_name(dp_to_dig_port(intel_dp)->base.port)); + I915_STATE_WARN(!vdd, "[ENCODER:%d:%s] VDD already requested on\n", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name); } static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) @@ -2537,8 +2707,9 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) if (!edp_have_panel_vdd(intel_dp)) return; - DRM_DEBUG_KMS("Turning eDP port %c VDD off\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("Turning [ENCODER:%d:%s] VDD off\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); pp = ironlake_get_pp_control(intel_dp); pp &= ~EDP_FORCE_VDD; @@ -2600,8 +2771,9 @@ static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync) if (!intel_dp_is_edp(intel_dp)) return; - I915_STATE_WARN(!intel_dp->want_panel_vdd, "eDP port %c VDD not forced on", - port_name(dp_to_dig_port(intel_dp)->base.port)); + I915_STATE_WARN(!intel_dp->want_panel_vdd, "[ENCODER:%d:%s] VDD not forced on", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name); intel_dp->want_panel_vdd = false; @@ -2622,12 +2794,14 @@ static void edp_panel_on(struct intel_dp *intel_dp) if (!intel_dp_is_edp(intel_dp)) return; - DRM_DEBUG_KMS("Turn eDP port %c panel power on\n", - port_name(dp_to_dig_port(intel_dp)->base.port)); + DRM_DEBUG_KMS("Turn [ENCODER:%d:%s] panel power on\n", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name); if (WARN(edp_have_panel_power(intel_dp), - "eDP port %c panel power already on\n", - port_name(dp_to_dig_port(intel_dp)->base.port))) + "[ENCODER:%d:%s] panel power already on\n", + dp_to_dig_port(intel_dp)->base.base.base.id, + dp_to_dig_port(intel_dp)->base.base.name)) return; wait_panel_power_cycle(intel_dp); @@ -2682,11 +2856,11 @@ static void edp_panel_off(struct intel_dp *intel_dp) if (!intel_dp_is_edp(intel_dp)) return; - DRM_DEBUG_KMS("Turn eDP port %c panel power off\n", - port_name(dig_port->base.port)); + DRM_DEBUG_KMS("Turn [ENCODER:%d:%s] panel power off\n", + dig_port->base.base.base.id, dig_port->base.base.name); - WARN(!intel_dp->want_panel_vdd, "Need eDP port %c VDD to turn off panel\n", - port_name(dig_port->base.port)); + WARN(!intel_dp->want_panel_vdd, "Need [ENCODER:%d:%s] VDD to turn off panel\n", + dig_port->base.base.base.id, dig_port->base.base.name); pp = ironlake_get_pp_control(intel_dp); /* We need to switch off panel power _and_ force vdd, for otherwise some @@ -2831,8 +3005,8 @@ static void assert_dp_port(struct intel_dp *intel_dp, bool state) bool cur_state = I915_READ(intel_dp->output_reg) & DP_PORT_EN; I915_STATE_WARN(cur_state != state, - "DP port %c state assertion failure (expected %s, current %s)\n", - port_name(dig_port->base.port), + "[ENCODER:%d:%s] state assertion failure (expected %s, current %s)\n", + dig_port->base.base.base.id, dig_port->base.base.name, onoff(state), onoff(cur_state)); } #define assert_dp_port_disabled(d) assert_dp_port((d), false) @@ -3220,7 +3394,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, dp_train_pat & train_pat_mask); if (HAS_DDI(dev_priv)) { - u32 temp = I915_READ(DP_TP_CTL(port)); + u32 temp = I915_READ(intel_dp->regs.dp_tp_ctl); if (dp_train_pat & DP_LINK_SCRAMBLING_DISABLE) temp |= DP_TP_CTL_SCRAMBLE_DISABLE; @@ -3246,7 +3420,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, temp |= DP_TP_CTL_LINK_TRAIN_PAT4; break; } - I915_WRITE(DP_TP_CTL(port), temp); + I915_WRITE(intel_dp->regs.dp_tp_ctl, temp); } else if ((IS_IVYBRIDGE(dev_priv) && port == PORT_A) || (HAS_PCH_CPT(dev_priv) && port != PORT_A)) { @@ -3410,8 +3584,9 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) * port select always when logically disconnecting a power sequencer * from a port. */ - DRM_DEBUG_KMS("detaching pipe %c power sequencer from port %c\n", - pipe_name(pipe), port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("detaching pipe %c power sequencer from [ENCODER:%d:%s]\n", + pipe_name(pipe), intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); I915_WRITE(pp_on_reg, 0); POSTING_READ(pp_on_reg); @@ -3427,17 +3602,18 @@ static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv, for_each_intel_dp(&dev_priv->drm, encoder) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - enum port port = encoder->port; WARN(intel_dp->active_pipe == pipe, - "stealing pipe %c power sequencer from active (e)DP port %c\n", - pipe_name(pipe), port_name(port)); + "stealing pipe %c power sequencer from active [ENCODER:%d:%s]\n", + pipe_name(pipe), encoder->base.base.id, + encoder->base.name); if (intel_dp->pps_pipe != pipe) continue; - DRM_DEBUG_KMS("stealing pipe %c power sequencer from port %c\n", - pipe_name(pipe), port_name(port)); + DRM_DEBUG_KMS("stealing pipe %c power sequencer from [ENCODER:%d:%s]\n", + pipe_name(pipe), encoder->base.base.id, + encoder->base.name); /* make sure vdd is off before we steal it */ vlv_detach_power_sequencer(intel_dp); @@ -3479,8 +3655,9 @@ static void vlv_init_panel_power_sequencer(struct intel_encoder *encoder, /* now it's all ours */ intel_dp->pps_pipe = crtc->pipe; - DRM_DEBUG_KMS("initializing pipe %c power sequencer for port %c\n", - pipe_name(intel_dp->pps_pipe), port_name(encoder->port)); + DRM_DEBUG_KMS("initializing pipe %c power sequencer for [ENCODER:%d:%s]\n", + pipe_name(intel_dp->pps_pipe), encoder->base.base.id, + encoder->base.name); /* init power sequencer on this pipe and port */ intel_dp_init_panel_power_sequencer(intel_dp); @@ -3944,22 +4121,22 @@ void intel_dp_set_idle_link_train(struct intel_dp *intel_dp) if (!HAS_DDI(dev_priv)) return; - val = I915_READ(DP_TP_CTL(port)); + val = I915_READ(intel_dp->regs.dp_tp_ctl); val &= ~DP_TP_CTL_LINK_TRAIN_MASK; val |= DP_TP_CTL_LINK_TRAIN_IDLE; - I915_WRITE(DP_TP_CTL(port), val); + I915_WRITE(intel_dp->regs.dp_tp_ctl, val); /* - * On PORT_A we can have only eDP in SST mode. There the only reason - * we need to set idle transmission mode is to work around a HW issue - * where we enable the pipe while not in idle link-training mode. + * Until TGL on PORT_A we can have only eDP in SST mode. There the only + * reason we need to set idle transmission mode is to work around a HW + * issue where we enable the pipe while not in idle link-training mode. * In this case there is requirement to wait for a minimum number of * idle patterns to be sent. */ - if (port == PORT_A) + if (port == PORT_A && INTEL_GEN(dev_priv) < 12) return; - if (intel_de_wait_for_set(dev_priv, DP_TP_STATUS(port), + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, DP_TP_STATUS_IDLE_DONE, 1)) DRM_ERROR("Timed out waiting for DP idle patterns\n"); } @@ -4301,9 +4478,10 @@ intel_dp_configure_mst(struct intel_dp *intel_dp) &dp_to_dig_port(intel_dp)->base; bool sink_can_mst = intel_dp_sink_can_mst(intel_dp); - DRM_DEBUG_KMS("MST support? port %c: %s, sink: %s, modparam: %s\n", - port_name(encoder->port), yesno(intel_dp->can_mst), - yesno(sink_can_mst), yesno(i915_modparams.enable_dp_mst)); + DRM_DEBUG_KMS("[ENCODER:%d:%s] MST support: port: %s, sink: %s, modparam: %s\n", + encoder->base.base.id, encoder->base.name, + yesno(intel_dp->can_mst), yesno(sink_can_mst), + yesno(i915_modparams.enable_dp_mst)); if (!intel_dp->can_mst) return; @@ -4323,94 +4501,36 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) DP_DPRX_ESI_LEN; } -u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, - int mode_clock, int mode_hdisplay) +bool +intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - u16 bits_per_pixel, max_bpp_small_joiner_ram; - int i; - /* - * Available Link Bandwidth(Kbits/sec) = (NumberOfLanes)* - * (LinkSymbolClock)* 8 * ((100-FECOverhead)/100)*(TimeSlotsPerMTP) - * FECOverhead = 2.4%, for SST -> TimeSlotsPerMTP is 1, - * for MST -> TimeSlotsPerMTP has to be calculated + * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication + * of Color Encoding Format and Content Color Gamut], in order to + * sending YCBCR 420 or HDR BT.2020 signals we should use DP VSC SDP. */ - bits_per_pixel = (link_clock * lane_count * 8 * - DP_DSC_FEC_OVERHEAD_FACTOR) / - mode_clock; - - /* Small Joiner Check: output bpp <= joiner RAM (bits) / Horiz. width */ - max_bpp_small_joiner_ram = DP_DSC_MAX_SMALL_JOINER_RAM_BUFFER / - mode_hdisplay; - - /* - * Greatest allowed DSC BPP = MIN (output BPP from avaialble Link BW - * check, output bpp from small joiner RAM check) - */ - bits_per_pixel = min(bits_per_pixel, max_bpp_small_joiner_ram); - - /* Error out if the max bpp is less than smallest allowed valid bpp */ - if (bits_per_pixel < valid_dsc_bpp[0]) { - DRM_DEBUG_KMS("Unsupported BPP %d\n", bits_per_pixel); - return 0; - } - - /* Find the nearest match in the array of known BPPs from VESA */ - for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp) - 1; i++) { - if (bits_per_pixel < valid_dsc_bpp[i + 1]) - break; - } - bits_per_pixel = valid_dsc_bpp[i]; - - /* - * Compressed BPP in U6.4 format so multiply by 16, for Gen 11, - * fractional part is 0 - */ - return bits_per_pixel << 4; -} - -u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, - int mode_clock, - int mode_hdisplay) -{ - u8 min_slice_count, i; - int max_slice_width; - - if (mode_clock <= DP_DSC_PEAK_PIXEL_RATE) - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_0); - else - min_slice_count = DIV_ROUND_UP(mode_clock, - DP_DSC_MAX_ENC_THROUGHPUT_1); - - max_slice_width = drm_dp_dsc_sink_max_slice_width(intel_dp->dsc_dpcd); - if (max_slice_width < DP_DSC_MIN_SLICE_WIDTH_VALUE) { - DRM_DEBUG_KMS("Unsupported slice width %d by DP DSC Sink device\n", - max_slice_width); - return 0; - } - /* Also take into account max slice width */ - min_slice_count = min_t(u8, min_slice_count, - DIV_ROUND_UP(mode_hdisplay, - max_slice_width)); + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + return true; - /* Find the closest match to the valid slice count values */ - for (i = 0; i < ARRAY_SIZE(valid_dsc_slicecount); i++) { - if (valid_dsc_slicecount[i] > - drm_dp_dsc_sink_max_slice_count(intel_dp->dsc_dpcd, - false)) - break; - if (min_slice_count <= valid_dsc_slicecount[i]) - return valid_dsc_slicecount[i]; + switch (conn_state->colorspace) { + case DRM_MODE_COLORIMETRY_SYCC_601: + case DRM_MODE_COLORIMETRY_OPYCC_601: + case DRM_MODE_COLORIMETRY_BT2020_YCC: + case DRM_MODE_COLORIMETRY_BT2020_RGB: + case DRM_MODE_COLORIMETRY_BT2020_CYCC: + return true; + default: + break; } - DRM_DEBUG_KMS("Unsupported Slice Count %d\n", min_slice_count); - return 0; + return false; } static void -intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) +intel_dp_setup_vsc_sdp(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct dp_sdp vsc_sdp = {}; @@ -4431,13 +4551,55 @@ intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, */ vsc_sdp.sdp_header.HB3 = 0x13; - /* - * YCbCr 420 = 3h DB16[7:4] ITU-R BT.601 = 0h, ITU-R BT.709 = 1h - * DB16[3:0] DP 1.4a spec, Table 2-120 - */ - vsc_sdp.db[16] = 0x3 << 4; /* 0x3 << 4 , YCbCr 420*/ - /* RGB->YCBCR color conversion uses the BT.709 color space. */ - vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */ + /* DP 1.4a spec, Table 2-120 */ + switch (crtc_state->output_format) { + case INTEL_OUTPUT_FORMAT_YCBCR444: + vsc_sdp.db[16] = 0x1 << 4; /* YCbCr 444 : DB16[7:4] = 1h */ + break; + case INTEL_OUTPUT_FORMAT_YCBCR420: + vsc_sdp.db[16] = 0x3 << 4; /* YCbCr 420 : DB16[7:4] = 3h */ + break; + case INTEL_OUTPUT_FORMAT_RGB: + default: + /* RGB: DB16[7:4] = 0h */ + break; + } + + switch (conn_state->colorspace) { + case DRM_MODE_COLORIMETRY_BT709_YCC: + vsc_sdp.db[16] |= 0x1; + break; + case DRM_MODE_COLORIMETRY_XVYCC_601: + vsc_sdp.db[16] |= 0x2; + break; + case DRM_MODE_COLORIMETRY_XVYCC_709: + vsc_sdp.db[16] |= 0x3; + break; + case DRM_MODE_COLORIMETRY_SYCC_601: + vsc_sdp.db[16] |= 0x4; + break; + case DRM_MODE_COLORIMETRY_OPYCC_601: + vsc_sdp.db[16] |= 0x5; + break; + case DRM_MODE_COLORIMETRY_BT2020_CYCC: + case DRM_MODE_COLORIMETRY_BT2020_RGB: + vsc_sdp.db[16] |= 0x6; + break; + case DRM_MODE_COLORIMETRY_BT2020_YCC: + vsc_sdp.db[16] |= 0x7; + break; + case DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65: + case DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER: + vsc_sdp.db[16] |= 0x4; /* DCI-P3 (SMPTE RP 431-2) */ + break; + default: + /* sRGB (IEC 61966-2-1) / ITU-R BT.601: DB16[0:3] = 0h */ + + /* RGB->YCBCR color conversion uses the BT.709 color space. */ + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */ + break; + } /* * For pixel encoding formats YCbCr444, YCbCr422, YCbCr420, and Y Only, @@ -4489,13 +4651,106 @@ intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, crtc_state, DP_SDP_VSC, &vsc_sdp, sizeof(vsc_sdp)); } -void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) +static void +intel_dp_setup_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct dp_sdp infoframe_sdp = {}; + struct hdmi_drm_infoframe drm_infoframe = {}; + const int infoframe_size = HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE; + unsigned char buf[HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE]; + ssize_t len; + int ret; + + ret = drm_hdmi_infoframe_set_hdr_metadata(&drm_infoframe, conn_state); + if (ret) { + DRM_DEBUG_KMS("couldn't set HDR metadata in infoframe\n"); + return; + } + + len = hdmi_drm_infoframe_pack_only(&drm_infoframe, buf, sizeof(buf)); + if (len < 0) { + DRM_DEBUG_KMS("buffer size is smaller than hdr metadata infoframe\n"); + return; + } + + if (len != infoframe_size) { + DRM_DEBUG_KMS("wrong static hdr metadata size\n"); + return; + } + + /* + * Set up the infoframe sdp packet for HDR static metadata. + * Prepare VSC Header for SU as per DP 1.4a spec, + * Table 2-100 and Table 2-101 + */ + + /* Packet ID, 00h for non-Audio INFOFRAME */ + infoframe_sdp.sdp_header.HB0 = 0; + /* + * Packet Type 80h + Non-audio INFOFRAME Type value + * HDMI_INFOFRAME_TYPE_DRM: 0x87, + */ + infoframe_sdp.sdp_header.HB1 = drm_infoframe.type; + /* + * Least Significant Eight Bits of (Data Byte Count – 1) + * infoframe_size - 1, + */ + infoframe_sdp.sdp_header.HB2 = 0x1D; + /* INFOFRAME SDP Version Number */ + infoframe_sdp.sdp_header.HB3 = (0x13 << 2); + /* CTA Header Byte 2 (INFOFRAME Version Number) */ + infoframe_sdp.db[0] = drm_infoframe.version; + /* CTA Header Byte 3 (Length of INFOFRAME): HDMI_DRM_INFOFRAME_SIZE */ + infoframe_sdp.db[1] = drm_infoframe.length; + /* + * Copy HDMI_DRM_INFOFRAME_SIZE size from a buffer after + * HDMI_INFOFRAME_HEADER_SIZE + */ + BUILD_BUG_ON(sizeof(infoframe_sdp.db) < HDMI_DRM_INFOFRAME_SIZE + 2); + memcpy(&infoframe_sdp.db[2], &buf[HDMI_INFOFRAME_HEADER_SIZE], + HDMI_DRM_INFOFRAME_SIZE); + + /* + * Size of DP infoframe sdp packet for HDR static metadata is consist of + * - DP SDP Header(struct dp_sdp_header): 4 bytes + * - Two Data Blocks: 2 bytes + * CTA Header Byte2 (INFOFRAME Version Number) + * CTA Header Byte3 (Length of INFOFRAME) + * - HDMI_DRM_INFOFRAME_SIZE: 26 bytes + * + * Prior to GEN11's GMP register size is identical to DP HDR static metadata + * infoframe size. But GEN11+ has larger than that size, write_infoframe + * will pad rest of the size. + */ + intel_dig_port->write_infoframe(&intel_dig_port->base, crtc_state, + HDMI_PACKET_TYPE_GAMUT_METADATA, + &infoframe_sdp, + sizeof(struct dp_sdp_header) + 2 + HDMI_DRM_INFOFRAME_SIZE); +} + +void intel_dp_vsc_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { - if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_YCBCR420) + if (!intel_dp_needs_vsc_sdp(crtc_state, conn_state)) return; - intel_pixel_encoding_setup_vsc(intel_dp, crtc_state); + intel_dp_setup_vsc_sdp(intel_dp, crtc_state, conn_state); +} + +void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + if (!conn_state->hdr_output_metadata) + return; + + intel_dp_setup_hdr_metadata_infoframe_sdp(intel_dp, + crtc_state, + conn_state); } static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp) @@ -5217,6 +5472,9 @@ static bool icl_combo_port_connected(struct drm_i915_private *dev_priv, { enum port port = intel_dig_port->base.port; + if (HAS_PCH_MCC(dev_priv) && port == PORT_C) + return I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1); + return I915_READ(SDEISR) & SDE_DDI_HOTPLUG_ICP(port); } @@ -6262,13 +6520,15 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) * would end up in an endless cycle of * "vdd off -> long hpd -> vdd on -> detect -> vdd off -> ..." */ - DRM_DEBUG_KMS("ignoring long hpd on eDP port %c\n", - port_name(intel_dig_port->base.port)); + DRM_DEBUG_KMS("ignoring long hpd on eDP [ENCODER:%d:%s]\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); return IRQ_HANDLED; } - DRM_DEBUG_KMS("got hpd irq on port %c - %s\n", - port_name(intel_dig_port->base.port), + DRM_DEBUG_KMS("got hpd irq on [ENCODER:%d:%s] - %s\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name, long_hpd ? "long" : "short"); if (long_hpd) { @@ -6335,6 +6595,13 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect else if (INTEL_GEN(dev_priv) >= 5) drm_connector_attach_max_bpc_property(connector, 6, 12); + intel_attach_colorspace_property(connector); + + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) >= 11) + drm_object_attach_property(&connector->base, + connector->dev->mode_config.hdr_output_metadata_property, + 0); + if (intel_dp_is_edp(intel_dp)) { u32 allowed_scalers; @@ -7132,8 +7399,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dp_modeset_retry_work_fn); if (WARN(intel_dig_port->max_lanes < 1, - "Not enough lanes (%d) for DP on port %c\n", - intel_dig_port->max_lanes, port_name(port))) + "Not enough lanes (%d) for DP on [ENCODER:%d:%s]\n", + intel_dig_port->max_lanes, intel_encoder->base.base.id, + intel_encoder->base.name)) return false; intel_dp_set_source_rates(intel_dp); @@ -7174,9 +7442,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, port != PORT_B && port != PORT_C)) return false; - DRM_DEBUG_KMS("Adding %s connector on port %c\n", - type == DRM_MODE_CONNECTOR_eDP ? "eDP" : "DP", - port_name(port)); + DRM_DEBUG_KMS("Adding %s connector on [ENCODER:%d:%s]\n", + type == DRM_MODE_CONNECTOR_eDP ? "eDP" : "DP", + intel_encoder->base.base.id, intel_encoder->base.name); drm_connector_init(dev, connector, &intel_dp_connector_funcs, type); drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs); @@ -7200,11 +7468,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_connector->get_hw_state = intel_connector_get_hw_state; /* init MST on ports that can support it */ - if (HAS_DP_MST(dev_priv) && !intel_dp_is_edp(intel_dp) && - (port == PORT_B || port == PORT_C || - port == PORT_D || port == PORT_F)) - intel_dp_mst_encoder_init(intel_dig_port, - intel_connector->base.base.id); + intel_dp_mst_encoder_init(intel_dig_port, + intel_connector->base.base.id); if (!intel_edp_init_connector(intel_dp, intel_connector)) { intel_dp_aux_fini(intel_dp); @@ -7295,11 +7560,11 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder->power_domain = intel_port_to_power_domain(port); if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) - intel_encoder->crtc_mask = 1 << 2; + intel_encoder->crtc_mask = BIT(PIPE_C); else - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); } intel_encoder->cloneable = 0; intel_encoder->port = port; diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 657bbb1f5ed0..3da166054788 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -13,6 +13,7 @@ #include "i915_reg.h" enum pipe; +enum port; struct drm_connector_state; struct drm_encoder; struct drm_i915_private; @@ -102,15 +103,19 @@ bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp); bool intel_dp_source_supports_hbr3(struct intel_dp *intel_dp); bool intel_dp_get_link_status(struct intel_dp *intel_dp, u8 *link_status); -u16 intel_dp_dsc_get_output_bpp(int link_clock, u8 lane_count, - int mode_clock, int mode_hdisplay); -u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock, - int mode_hdisplay); bool intel_dp_read_dpcd(struct intel_dp *intel_dp); bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); +bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); +void intel_dp_vsc_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); +void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); bool intel_digital_port_connected(struct intel_encoder *encoder); static inline unsigned int intel_dp_unused_lane_mask(int lane_count) @@ -118,4 +123,6 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) return ~((1 << lane_count) - 1) & 0xf; } +u32 intel_dp_mode_to_fec_clock(u32 mode_clock); + #endif /* __INTEL_DP_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 6df240a01b8c..bbcab27644dc 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -81,7 +81,7 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, adjusted_mode->crtc_clock, crtc_state->port_clock, &crtc_state->dp_m_n, - constant_n); + constant_n, crtc_state->fec_enable); crtc_state->dp_m_n.tu = slots; return 0; @@ -215,7 +215,7 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); if (ret) { - DRM_ERROR("failed to update payload %d\n", ret); + DRM_DEBUG_KMS("failed to update payload %d\n", ret); } if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder, @@ -295,7 +295,6 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_dig_port->base.port; struct intel_connector *connector = to_intel_connector(conn_state->connector); int ret; @@ -326,8 +325,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_ERROR("failed to allocate vcpi\n"); intel_dp->active_mst_links++; - temp = I915_READ(DP_TP_STATUS(port)); - I915_WRITE(DP_TP_STATUS(port), temp); + temp = I915_READ(intel_dp->regs.dp_tp_status); + I915_WRITE(intel_dp->regs.dp_tp_status, temp); ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); @@ -342,11 +341,10 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder, struct intel_digital_port *intel_dig_port = intel_mst->primary; struct intel_dp *intel_dp = &intel_dig_port->dp; struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum port port = intel_dig_port->base.port; DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); - if (intel_de_wait_for_set(dev_priv, DP_TP_STATUS(port), + if (intel_de_wait_for_set(dev_priv, intel_dp->regs.dp_tp_status, DP_TP_STATUS_ACT_SENT, 1)) DRM_ERROR("Timed out waiting for ACT sent\n"); @@ -426,6 +424,7 @@ static enum drm_mode_status intel_dp_mst_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; @@ -453,7 +452,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector, if (mode_rate > max_rate || mode->clock > max_dotclk) return MODE_CLOCK_HIGH; - return MODE_OK; + return intel_mode_valid_max_plane_size(dev_priv, mode); } static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *connector, @@ -599,6 +598,8 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum struct intel_dp_mst_encoder *intel_mst; struct intel_encoder *intel_encoder; struct drm_device *dev = intel_dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + enum pipe pipe_iter; intel_mst = kzalloc(sizeof(*intel_mst), GFP_KERNEL); @@ -615,8 +616,9 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum intel_encoder->type = INTEL_OUTPUT_DP_MST; intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->base.port; - intel_encoder->crtc_mask = BIT(pipe); intel_encoder->cloneable = 0; + for_each_pipe(dev_priv, pipe_iter) + intel_encoder->crtc_mask |= BIT(pipe_iter); intel_encoder->compute_config = intel_dp_mst_compute_config; intel_encoder->disable = intel_mst_disable_dp; @@ -653,21 +655,31 @@ intel_dp_mst_encoder_active_links(struct intel_digital_port *intel_dig_port) int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_base_id) { + struct drm_i915_private *i915 = to_i915(intel_dig_port->base.base.dev); struct intel_dp *intel_dp = &intel_dig_port->dp; - struct drm_device *dev = intel_dig_port->base.base.dev; + enum port port = intel_dig_port->base.port; int ret; - intel_dp->can_mst = true; + if (!HAS_DP_MST(i915) || intel_dp_is_edp(intel_dp)) + return 0; + + if (INTEL_GEN(i915) < 12 && port == PORT_A) + return 0; + + if (INTEL_GEN(i915) < 11 && port == PORT_E) + return 0; + intel_dp->mst_mgr.cbs = &mst_cbs; /* create encoders */ intel_dp_create_fake_mst_encoders(intel_dig_port); - ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, dev, + ret = drm_dp_mst_topology_mgr_init(&intel_dp->mst_mgr, &i915->drm, &intel_dp->aux, 16, 3, conn_base_id); - if (ret) { - intel_dp->can_mst = false; + if (ret) return ret; - } + + intel_dp->can_mst = true; + return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index b8148f838354..ec10fa7d3c69 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -247,8 +247,7 @@ static struct intel_shared_dpll * intel_find_shared_dpll(struct intel_atomic_state *state, const struct intel_crtc *crtc, const struct intel_dpll_hw_state *pll_state, - enum intel_dpll_id range_min, - enum intel_dpll_id range_max) + unsigned long dpll_mask) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_shared_dpll *pll, *unused_pll = NULL; @@ -257,7 +256,9 @@ intel_find_shared_dpll(struct intel_atomic_state *state, shared_dpll = intel_atomic_get_shared_dpll_state(&state->base); - for (i = range_min; i <= range_max; i++) { + WARN_ON(dpll_mask & ~(BIT(I915_NUM_PLLS) - 1)); + + for_each_set_bit(i, &dpll_mask, I915_NUM_PLLS) { pll = &dev_priv->shared_dplls[i]; /* Only want to check enabled timings first */ @@ -464,8 +465,8 @@ static bool ibx_get_dpll(struct intel_atomic_state *state, } else { pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_PCH_PLL_A, - DPLL_ID_PCH_PLL_B); + BIT(DPLL_ID_PCH_PLL_B) | + BIT(DPLL_ID_PCH_PLL_A)); } if (!pll) @@ -814,7 +815,8 @@ hsw_ddi_hdmi_get_dpll(struct intel_atomic_state *state, pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_WRPLL1, DPLL_ID_WRPLL2); + BIT(DPLL_ID_WRPLL2) | + BIT(DPLL_ID_WRPLL1)); if (!pll) return NULL; @@ -877,7 +879,7 @@ static bool hsw_get_dpll(struct intel_atomic_state *state, pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SPLL, DPLL_ID_SPLL); + BIT(DPLL_ID_SPLL)); } else { return false; } @@ -1447,13 +1449,13 @@ static bool skl_get_dpll(struct intel_atomic_state *state, if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SKL_DPLL0, - DPLL_ID_SKL_DPLL0); + BIT(DPLL_ID_SKL_DPLL0)); else pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SKL_DPLL1, - DPLL_ID_SKL_DPLL3); + BIT(DPLL_ID_SKL_DPLL3) | + BIT(DPLL_ID_SKL_DPLL2) | + BIT(DPLL_ID_SKL_DPLL1)); if (!pll) return false; @@ -2401,8 +2403,9 @@ static bool cnl_get_dpll(struct intel_atomic_state *state, pll = intel_find_shared_dpll(state, crtc, &crtc_state->dpll_hw_state, - DPLL_ID_SKL_DPLL0, - DPLL_ID_SKL_DPLL2); + BIT(DPLL_ID_SKL_DPLL2) | + BIT(DPLL_ID_SKL_DPLL1) | + BIT(DPLL_ID_SKL_DPLL0)); if (!pll) { DRM_DEBUG_KMS("No PLL selected\n"); return false; @@ -2520,6 +2523,18 @@ static const struct skl_wrpll_params icl_tbt_pll_19_2MHz_values = { .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0, }; +static const struct skl_wrpll_params tgl_tbt_pll_19_2MHz_values = { + .dco_integer = 0x54, .dco_fraction = 0x3000, + /* the following params are unused */ + .pdiv = 0, .kdiv = 0, .qdiv_mode = 0, .qdiv_ratio = 0, +}; + +static const struct skl_wrpll_params tgl_tbt_pll_24MHz_values = { + .dco_integer = 0x43, .dco_fraction = 0x4000, + /* the following params are unused */ + .pdiv = 0, .kdiv = 0, .qdiv_mode = 0, .qdiv_ratio = 0, +}; + static bool icl_calc_dp_combo_pll(struct intel_crtc_state *crtc_state, struct skl_wrpll_params *pll_params) { @@ -2547,8 +2562,34 @@ static bool icl_calc_tbt_pll(struct intel_crtc_state *crtc_state, { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - *pll_params = dev_priv->cdclk.hw.ref == 24000 ? - icl_tbt_pll_24MHz_values : icl_tbt_pll_19_2MHz_values; + if (INTEL_GEN(dev_priv) >= 12) { + switch (dev_priv->cdclk.hw.ref) { + default: + MISSING_CASE(dev_priv->cdclk.hw.ref); + /* fall-through */ + case 19200: + case 38400: + *pll_params = tgl_tbt_pll_19_2MHz_values; + break; + case 24000: + *pll_params = tgl_tbt_pll_24MHz_values; + break; + } + } else { + switch (dev_priv->cdclk.hw.ref) { + default: + MISSING_CASE(dev_priv->cdclk.hw.ref); + /* fall-through */ + case 19200: + case 38400: + *pll_params = icl_tbt_pll_19_2MHz_values; + break; + case 24000: + *pll_params = icl_tbt_pll_24MHz_values; + break; + } + } + return true; } @@ -2607,7 +2648,8 @@ enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port) static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, u32 *target_dco_khz, - struct intel_dpll_hw_state *state) + struct intel_dpll_hw_state *state, + bool is_dkl) { u32 dco_min_freq, dco_max_freq; int div1_vals[] = {7, 5, 3, 2}; @@ -2629,8 +2671,13 @@ static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, continue; if (div2 >= 2) { + /* + * Note: a_divratio not matching TGL BSpec + * algorithm but matching hardcoded values and + * working on HW for DP alt-mode at least + */ a_divratio = is_dp ? 10 : 5; - tlinedrv = 2; + tlinedrv = is_dkl ? 1 : 2; } else { a_divratio = 5; tlinedrv = 0; @@ -2693,11 +2740,12 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, u64 tmp; bool use_ssc = false; bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI); + bool is_dkl = INTEL_GEN(dev_priv) >= 12; memset(pll_state, 0, sizeof(*pll_state)); if (!icl_mg_pll_find_divisors(clock, is_dp, use_ssc, &dco_khz, - pll_state)) { + pll_state, is_dkl)) { DRM_DEBUG_KMS("Failed to find divisors for clock %d\n", clock); return false; } @@ -2705,8 +2753,11 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, m1div = 2; m2div_int = dco_khz / (refclk_khz * m1div); if (m2div_int > 255) { - m1div = 4; - m2div_int = dco_khz / (refclk_khz * m1div); + if (!is_dkl) { + m1div = 4; + m2div_int = dco_khz / (refclk_khz * m1div); + } + if (m2div_int > 255) { DRM_DEBUG_KMS("Failed to find mdiv for clock %d\n", clock); @@ -2786,60 +2837,94 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, } ssc_steplog = 4; - pll_state->mg_pll_div0 = (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | - MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | - MG_PLL_DIV0_FBDIV_INT(m2div_int); - - pll_state->mg_pll_div1 = MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) | - MG_PLL_DIV1_DITHER_DIV_2 | - MG_PLL_DIV1_NDIVRATIO(1) | - MG_PLL_DIV1_FBPREDIV(m1div); - - pll_state->mg_pll_lf = MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) | - MG_PLL_LF_AFCCNTSEL_512 | - MG_PLL_LF_GAINCTRL(1) | - MG_PLL_LF_INT_COEFF(int_coeff) | - MG_PLL_LF_PROP_COEFF(prop_coeff); - - pll_state->mg_pll_frac_lock = MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 | - MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 | - MG_PLL_FRAC_LOCK_LOCKTHRESH(10) | - MG_PLL_FRAC_LOCK_DCODITHEREN | - MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain); - if (use_ssc || m2div_rem > 0) - pll_state->mg_pll_frac_lock |= MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN; - - pll_state->mg_pll_ssc = (use_ssc ? MG_PLL_SSC_EN : 0) | - MG_PLL_SSC_TYPE(2) | - MG_PLL_SSC_STEPLENGTH(ssc_steplen) | - MG_PLL_SSC_STEPNUM(ssc_steplog) | - MG_PLL_SSC_FLLEN | - MG_PLL_SSC_STEPSIZE(ssc_stepsize); - - pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART | - MG_PLL_TDC_COLDST_IREFINT_EN | - MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | - MG_PLL_TDC_TDCOVCCORR_EN | - MG_PLL_TDC_TDCSEL(3); - - pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) | - MG_PLL_BIAS_INIT_DCOAMP(0x3F) | - MG_PLL_BIAS_BIAS_BONUS(10) | - MG_PLL_BIAS_BIASCAL_EN | - MG_PLL_BIAS_CTRIM(12) | - MG_PLL_BIAS_VREF_RDAC(4) | - MG_PLL_BIAS_IREFTRIM(iref_trim); - - if (refclk_khz == 38400) { - pll_state->mg_pll_tdc_coldst_bias_mask = MG_PLL_TDC_COLDST_COLDSTART; - pll_state->mg_pll_bias_mask = 0; + /* write pll_state calculations */ + if (is_dkl) { + pll_state->mg_pll_div0 = DKL_PLL_DIV0_INTEG_COEFF(int_coeff) | + DKL_PLL_DIV0_PROP_COEFF(prop_coeff) | + DKL_PLL_DIV0_FBPREDIV(m1div) | + DKL_PLL_DIV0_FBDIV_INT(m2div_int); + + pll_state->mg_pll_div1 = DKL_PLL_DIV1_IREF_TRIM(iref_trim) | + DKL_PLL_DIV1_TDC_TARGET_CNT(tdc_targetcnt); + + pll_state->mg_pll_ssc = DKL_PLL_SSC_IREF_NDIV_RATIO(iref_ndiv) | + DKL_PLL_SSC_STEP_LEN(ssc_steplen) | + DKL_PLL_SSC_STEP_NUM(ssc_steplog) | + (use_ssc ? DKL_PLL_SSC_EN : 0); + + pll_state->mg_pll_bias = (m2div_frac ? DKL_PLL_BIAS_FRAC_EN_H : 0) | + DKL_PLL_BIAS_FBDIV_FRAC(m2div_frac); + + pll_state->mg_pll_tdc_coldst_bias = + DKL_PLL_TDC_SSC_STEP_SIZE(ssc_stepsize) | + DKL_PLL_TDC_FEED_FWD_GAIN(feedfwgain); + } else { - pll_state->mg_pll_tdc_coldst_bias_mask = -1U; - pll_state->mg_pll_bias_mask = -1U; - } + pll_state->mg_pll_div0 = + (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | + MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | + MG_PLL_DIV0_FBDIV_INT(m2div_int); + + pll_state->mg_pll_div1 = + MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) | + MG_PLL_DIV1_DITHER_DIV_2 | + MG_PLL_DIV1_NDIVRATIO(1) | + MG_PLL_DIV1_FBPREDIV(m1div); + + pll_state->mg_pll_lf = + MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) | + MG_PLL_LF_AFCCNTSEL_512 | + MG_PLL_LF_GAINCTRL(1) | + MG_PLL_LF_INT_COEFF(int_coeff) | + MG_PLL_LF_PROP_COEFF(prop_coeff); + + pll_state->mg_pll_frac_lock = + MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_LOCKTHRESH(10) | + MG_PLL_FRAC_LOCK_DCODITHEREN | + MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain); + if (use_ssc || m2div_rem > 0) + pll_state->mg_pll_frac_lock |= + MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN; + + pll_state->mg_pll_ssc = + (use_ssc ? MG_PLL_SSC_EN : 0) | + MG_PLL_SSC_TYPE(2) | + MG_PLL_SSC_STEPLENGTH(ssc_steplen) | + MG_PLL_SSC_STEPNUM(ssc_steplog) | + MG_PLL_SSC_FLLEN | + MG_PLL_SSC_STEPSIZE(ssc_stepsize); + + pll_state->mg_pll_tdc_coldst_bias = + MG_PLL_TDC_COLDST_COLDSTART | + MG_PLL_TDC_COLDST_IREFINT_EN | + MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | + MG_PLL_TDC_TDCOVCCORR_EN | + MG_PLL_TDC_TDCSEL(3); + + pll_state->mg_pll_bias = + MG_PLL_BIAS_BIAS_GB_SEL(3) | + MG_PLL_BIAS_INIT_DCOAMP(0x3F) | + MG_PLL_BIAS_BIAS_BONUS(10) | + MG_PLL_BIAS_BIASCAL_EN | + MG_PLL_BIAS_CTRIM(12) | + MG_PLL_BIAS_VREF_RDAC(4) | + MG_PLL_BIAS_IREFTRIM(iref_trim); + + if (refclk_khz == 38400) { + pll_state->mg_pll_tdc_coldst_bias_mask = + MG_PLL_TDC_COLDST_COLDSTART; + pll_state->mg_pll_bias_mask = 0; + } else { + pll_state->mg_pll_tdc_coldst_bias_mask = -1U; + pll_state->mg_pll_bias_mask = -1U; + } - pll_state->mg_pll_tdc_coldst_bias &= pll_state->mg_pll_tdc_coldst_bias_mask; - pll_state->mg_pll_bias &= pll_state->mg_pll_bias_mask; + pll_state->mg_pll_tdc_coldst_bias &= + pll_state->mg_pll_tdc_coldst_bias_mask; + pll_state->mg_pll_bias &= pll_state->mg_pll_bias_mask; + } return true; } @@ -2893,7 +2978,7 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, &crtc_state->icl_port_dplls[ICL_PORT_DPLL_DEFAULT]; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum port port = encoder->port; - bool has_dpll4 = false; + unsigned long dpll_mask; if (!icl_calc_dpll_state(crtc_state, encoder, &port_dpll->hw_state)) { DRM_DEBUG_KMS("Could not calculate combo PHY PLL state.\n"); @@ -2902,16 +2987,19 @@ static bool icl_get_combo_phy_dpll(struct intel_atomic_state *state, } if (IS_ELKHARTLAKE(dev_priv) && port != PORT_A) - has_dpll4 = true; + dpll_mask = + BIT(DPLL_ID_EHL_DPLL4) | + BIT(DPLL_ID_ICL_DPLL1) | + BIT(DPLL_ID_ICL_DPLL0); + else + dpll_mask = BIT(DPLL_ID_ICL_DPLL1) | BIT(DPLL_ID_ICL_DPLL0); port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, - DPLL_ID_ICL_DPLL0, - has_dpll4 ? DPLL_ID_EHL_DPLL4 - : DPLL_ID_ICL_DPLL1); + dpll_mask); if (!port_dpll->pll) { - DRM_DEBUG_KMS("No combo PHY PLL found for port %c\n", - port_name(encoder->port)); + DRM_DEBUG_KMS("No combo PHY PLL found for [ENCODER:%d:%s]\n", + encoder->base.base.id, encoder->base.name); return false; } @@ -2941,8 +3029,7 @@ static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state, port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, - DPLL_ID_ICL_TBTPLL, - DPLL_ID_ICL_TBTPLL); + BIT(DPLL_ID_ICL_TBTPLL)); if (!port_dpll->pll) { DRM_DEBUG_KMS("No TBT-ALT PLL found\n"); return false; @@ -2961,8 +3048,7 @@ static bool icl_get_tc_phy_dplls(struct intel_atomic_state *state, encoder->port)); port_dpll->pll = intel_find_shared_dpll(state, crtc, &port_dpll->hw_state, - dpll_id, - dpll_id); + BIT(dpll_id)); if (!port_dpll->pll) { DRM_DEBUG_KMS("No MG PHY PLL found\n"); goto err_unreference_tbt_pll; @@ -3086,6 +3172,78 @@ out: return ret; } +static bool dkl_pll_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll, + struct intel_dpll_hw_state *hw_state) +{ + const enum intel_dpll_id id = pll->info->id; + enum tc_port tc_port = icl_pll_id_to_tc_port(id); + intel_wakeref_t wakeref; + bool ret = false; + u32 val; + + wakeref = intel_display_power_get_if_enabled(dev_priv, + POWER_DOMAIN_DISPLAY_CORE); + if (!wakeref) + return false; + + val = I915_READ(MG_PLL_ENABLE(tc_port)); + if (!(val & PLL_ENABLE)) + goto out; + + /* + * All registers read here have the same HIP_INDEX_REG even though + * they are on different building blocks + */ + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2)); + + hw_state->mg_refclkin_ctl = I915_READ(DKL_REFCLKIN_CTL(tc_port)); + hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK; + + hw_state->mg_clktop2_hsclkctl = + I915_READ(DKL_CLKTOP2_HSCLKCTL(tc_port)); + hw_state->mg_clktop2_hsclkctl &= + MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK; + + hw_state->mg_clktop2_coreclkctl1 = + I915_READ(DKL_CLKTOP2_CORECLKCTL1(tc_port)); + hw_state->mg_clktop2_coreclkctl1 &= + MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; + + hw_state->mg_pll_div0 = I915_READ(DKL_PLL_DIV0(tc_port)); + hw_state->mg_pll_div0 &= (DKL_PLL_DIV0_INTEG_COEFF_MASK | + DKL_PLL_DIV0_PROP_COEFF_MASK | + DKL_PLL_DIV0_FBPREDIV_MASK | + DKL_PLL_DIV0_FBDIV_INT_MASK); + + hw_state->mg_pll_div1 = I915_READ(DKL_PLL_DIV1(tc_port)); + hw_state->mg_pll_div1 &= (DKL_PLL_DIV1_IREF_TRIM_MASK | + DKL_PLL_DIV1_TDC_TARGET_CNT_MASK); + + hw_state->mg_pll_ssc = I915_READ(DKL_PLL_SSC(tc_port)); + hw_state->mg_pll_ssc &= (DKL_PLL_SSC_IREF_NDIV_RATIO_MASK | + DKL_PLL_SSC_STEP_LEN_MASK | + DKL_PLL_SSC_STEP_NUM_MASK | + DKL_PLL_SSC_EN); + + hw_state->mg_pll_bias = I915_READ(DKL_PLL_BIAS(tc_port)); + hw_state->mg_pll_bias &= (DKL_PLL_BIAS_FRAC_EN_H | + DKL_PLL_BIAS_FBDIV_FRAC_MASK); + + hw_state->mg_pll_tdc_coldst_bias = + I915_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port)); + hw_state->mg_pll_tdc_coldst_bias &= (DKL_PLL_TDC_SSC_STEP_SIZE_MASK | + DKL_PLL_TDC_FEED_FWD_GAIN_MASK); + + ret = true; +out: + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); + return ret; +} + static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state, @@ -3220,6 +3378,75 @@ static void icl_mg_pll_write(struct drm_i915_private *dev_priv, POSTING_READ(MG_PLL_TDC_COLDST_BIAS(tc_port)); } +static void dkl_pll_write(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; + enum tc_port tc_port = icl_pll_id_to_tc_port(pll->info->id); + u32 val; + + /* + * All registers programmed here have the same HIP_INDEX_REG even + * though on different building block + */ + I915_WRITE(HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, 0x2)); + + /* All the registers are RMW */ + val = I915_READ(DKL_REFCLKIN_CTL(tc_port)); + val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK; + val |= hw_state->mg_refclkin_ctl; + I915_WRITE(DKL_REFCLKIN_CTL(tc_port), val); + + val = I915_READ(DKL_CLKTOP2_CORECLKCTL1(tc_port)); + val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; + val |= hw_state->mg_clktop2_coreclkctl1; + I915_WRITE(DKL_CLKTOP2_CORECLKCTL1(tc_port), val); + + val = I915_READ(DKL_CLKTOP2_HSCLKCTL(tc_port)); + val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK); + val |= hw_state->mg_clktop2_hsclkctl; + I915_WRITE(DKL_CLKTOP2_HSCLKCTL(tc_port), val); + + val = I915_READ(DKL_PLL_DIV0(tc_port)); + val &= ~(DKL_PLL_DIV0_INTEG_COEFF_MASK | + DKL_PLL_DIV0_PROP_COEFF_MASK | + DKL_PLL_DIV0_FBPREDIV_MASK | + DKL_PLL_DIV0_FBDIV_INT_MASK); + val |= hw_state->mg_pll_div0; + I915_WRITE(DKL_PLL_DIV0(tc_port), val); + + val = I915_READ(DKL_PLL_DIV1(tc_port)); + val &= ~(DKL_PLL_DIV1_IREF_TRIM_MASK | + DKL_PLL_DIV1_TDC_TARGET_CNT_MASK); + val |= hw_state->mg_pll_div1; + I915_WRITE(DKL_PLL_DIV1(tc_port), val); + + val = I915_READ(DKL_PLL_SSC(tc_port)); + val &= ~(DKL_PLL_SSC_IREF_NDIV_RATIO_MASK | + DKL_PLL_SSC_STEP_LEN_MASK | + DKL_PLL_SSC_STEP_NUM_MASK | + DKL_PLL_SSC_EN); + val |= hw_state->mg_pll_ssc; + I915_WRITE(DKL_PLL_SSC(tc_port), val); + + val = I915_READ(DKL_PLL_BIAS(tc_port)); + val &= ~(DKL_PLL_BIAS_FRAC_EN_H | + DKL_PLL_BIAS_FBDIV_FRAC_MASK); + val |= hw_state->mg_pll_bias; + I915_WRITE(DKL_PLL_BIAS(tc_port), val); + + val = I915_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port)); + val &= ~(DKL_PLL_TDC_SSC_STEP_SIZE_MASK | + DKL_PLL_TDC_FEED_FWD_GAIN_MASK); + val |= hw_state->mg_pll_tdc_coldst_bias; + I915_WRITE(DKL_PLL_TDC_COLDST_BIAS(tc_port), val); + + POSTING_READ(DKL_PLL_TDC_COLDST_BIAS(tc_port)); +} + static void icl_pll_power_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, i915_reg_t enable_reg) @@ -3312,7 +3539,10 @@ static void mg_pll_enable(struct drm_i915_private *dev_priv, icl_pll_power_enable(dev_priv, pll, enable_reg); - icl_mg_pll_write(dev_priv, pll); + if (INTEL_GEN(dev_priv) >= 12) + dkl_pll_write(dev_priv, pll); + else + icl_mg_pll_write(dev_priv, pll); /* * DVFS pre sequence would be here, but in our driver the cdclk code @@ -3467,11 +3697,22 @@ static const struct intel_dpll_mgr ehl_pll_mgr = { .dump_hw_state = icl_dump_hw_state, }; +static const struct intel_shared_dpll_funcs dkl_pll_funcs = { + .enable = mg_pll_enable, + .disable = mg_pll_disable, + .get_hw_state = dkl_pll_get_hw_state, +}; + static const struct dpll_info tgl_plls[] = { { "DPLL 0", &combo_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, { "DPLL 1", &combo_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, { "TBT PLL", &tbt_pll_funcs, DPLL_ID_ICL_TBTPLL, 0 }, - /* TODO: Add typeC plls */ + { "TC PLL 1", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 }, + { "TC PLL 2", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 }, + { "TC PLL 3", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 }, + { "TC PLL 4", &dkl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 }, + { "TC PLL 5", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL5, 0 }, + { "TC PLL 6", &dkl_pll_funcs, DPLL_ID_TGL_MGPLL6, 0 }, { }, }; @@ -3479,6 +3720,7 @@ static const struct intel_dpll_mgr tgl_pll_mgr = { .dpll_info = tgl_plls, .get_dplls = icl_get_dplls, .put_dplls = icl_put_dplls, + .update_active_dpll = icl_update_active_dpll, .dump_hw_state = icl_dump_hw_state, }; diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c new file mode 100644 index 000000000000..bb5a0e91b370 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + * + */ + +#include "i915_drv.h" +#include "intel_display_types.h" + +#define DSB_BUF_SIZE (2 * PAGE_SIZE) + +/** + * DOC: DSB + * + * A DSB (Display State Buffer) is a queue of MMIO instructions in the memory + * which can be offloaded to DSB HW in Display Controller. DSB HW is a DMA + * engine that can be programmed to download the DSB from memory. + * It allows driver to batch submit display HW programming. This helps to + * reduce loading time and CPU activity, thereby making the context switch + * faster. DSB Support added from Gen12 Intel graphics based platform. + * + * DSB's can access only the pipe, plane, and transcoder Data Island Packet + * registers. + * + * DSB HW can support only register writes (both indexed and direct MMIO + * writes). There are no registers reads possible with DSB HW engine. + */ + +/* DSB opcodes. */ +#define DSB_OPCODE_SHIFT 24 +#define DSB_OPCODE_MMIO_WRITE 0x1 +#define DSB_OPCODE_INDEXED_WRITE 0x9 +#define DSB_BYTE_EN 0xF +#define DSB_BYTE_EN_SHIFT 20 +#define DSB_REG_VALUE_MASK 0xfffff + +static inline bool is_dsb_busy(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + + return DSB_STATUS & I915_READ(DSB_CTRL(pipe, dsb->id)); +} + +static inline bool intel_dsb_enable_engine(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + u32 dsb_ctrl; + + dsb_ctrl = I915_READ(DSB_CTRL(pipe, dsb->id)); + if (DSB_STATUS & dsb_ctrl) { + DRM_DEBUG_KMS("DSB engine is busy.\n"); + return false; + } + + dsb_ctrl |= DSB_ENABLE; + I915_WRITE(DSB_CTRL(pipe, dsb->id), dsb_ctrl); + + POSTING_READ(DSB_CTRL(pipe, dsb->id)); + return true; +} + +static inline bool intel_dsb_disable_engine(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + u32 dsb_ctrl; + + dsb_ctrl = I915_READ(DSB_CTRL(pipe, dsb->id)); + if (DSB_STATUS & dsb_ctrl) { + DRM_DEBUG_KMS("DSB engine is busy.\n"); + return false; + } + + dsb_ctrl &= ~DSB_ENABLE; + I915_WRITE(DSB_CTRL(pipe, dsb->id), dsb_ctrl); + + POSTING_READ(DSB_CTRL(pipe, dsb->id)); + return true; +} + +/** + * intel_dsb_get() - Allocate DSB context and return a DSB instance. + * @crtc: intel_crtc structure to get pipe info. + * + * This function provides handle of a DSB instance, for the further DSB + * operations. + * + * Returns: address of Intel_dsb instance requested for. + * Failure: Returns the same DSB instance, but without a command buffer. + */ + +struct intel_dsb * +intel_dsb_get(struct intel_crtc *crtc) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *i915 = to_i915(dev); + struct intel_dsb *dsb = &crtc->dsb; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + intel_wakeref_t wakeref; + + if (!HAS_DSB(i915)) + return dsb; + + if (atomic_add_return(1, &dsb->refcount) != 1) + return dsb; + + dsb->id = DSB1; + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + obj = i915_gem_object_create_internal(i915, DSB_BUF_SIZE); + if (IS_ERR(obj)) { + DRM_ERROR("Gem object creation failed\n"); + goto err; + } + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + DRM_ERROR("Vma creation failed\n"); + i915_gem_object_put(obj); + atomic_dec(&dsb->refcount); + goto err; + } + + dsb->cmd_buf = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); + if (IS_ERR(dsb->cmd_buf)) { + DRM_ERROR("Command buffer creation failed\n"); + i915_vma_unpin_and_release(&vma, 0); + dsb->cmd_buf = NULL; + atomic_dec(&dsb->refcount); + goto err; + } + dsb->vma = vma; + +err: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + return dsb; +} + +/** + * intel_dsb_put() - To destroy DSB context. + * @dsb: intel_dsb structure. + * + * This function destroys the DSB context allocated by a dsb_get(), by + * unpinning and releasing the VMA object associated with it. + */ + +void intel_dsb_put(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *i915 = to_i915(crtc->base.dev); + + if (!HAS_DSB(i915)) + return; + + if (WARN_ON(atomic_read(&dsb->refcount) == 0)) + return; + + if (atomic_dec_and_test(&dsb->refcount)) { + i915_vma_unpin_and_release(&dsb->vma, I915_VMA_RELEASE_MAP); + dsb->cmd_buf = NULL; + dsb->free_pos = 0; + dsb->ins_start_offset = 0; + } +} + +/** + * intel_dsb_indexed_reg_write() -Write to the DSB context for auto + * increment register. + * @dsb: intel_dsb structure. + * @reg: register address. + * @val: value. + * + * This function is used for writing register-value pair in command + * buffer of DSB for auto-increment register. During command buffer overflow, + * a warning is thrown and rest all erroneous condition register programming + * is done through mmio write. + */ + +void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, + u32 val) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 *buf = dsb->cmd_buf; + u32 reg_val; + + if (!buf) { + I915_WRITE(reg, val); + return; + } + + if (WARN_ON(dsb->free_pos >= DSB_BUF_SIZE)) { + DRM_DEBUG_KMS("DSB buffer overflow\n"); + return; + } + + /* + * For example the buffer will look like below for 3 dwords for auto + * increment register: + * +--------------------------------------------------------+ + * | size = 3 | offset &| value1 | value2 | value3 | zero | + * | | opcode | | | | | + * +--------------------------------------------------------+ + * + + + + + + + + * 0 4 8 12 16 20 24 + * Byte + * + * As every instruction is 8 byte aligned the index of dsb instruction + * will start always from even number while dealing with u32 array. If + * we are writing odd no of dwords, Zeros will be added in the end for + * padding. + */ + reg_val = buf[dsb->ins_start_offset + 1] & DSB_REG_VALUE_MASK; + if (reg_val != i915_mmio_reg_offset(reg)) { + /* Every instruction should be 8 byte aligned. */ + dsb->free_pos = ALIGN(dsb->free_pos, 2); + + dsb->ins_start_offset = dsb->free_pos; + + /* Update the size. */ + buf[dsb->free_pos++] = 1; + + /* Update the opcode and reg. */ + buf[dsb->free_pos++] = (DSB_OPCODE_INDEXED_WRITE << + DSB_OPCODE_SHIFT) | + i915_mmio_reg_offset(reg); + + /* Update the value. */ + buf[dsb->free_pos++] = val; + } else { + /* Update the new value. */ + buf[dsb->free_pos++] = val; + + /* Update the size. */ + buf[dsb->ins_start_offset]++; + } + + /* if number of data words is odd, then the last dword should be 0.*/ + if (dsb->free_pos & 0x1) + buf[dsb->free_pos] = 0; +} + +/** + * intel_dsb_reg_write() -Write to the DSB context for normal + * register. + * @dsb: intel_dsb structure. + * @reg: register address. + * @val: value. + * + * This function is used for writing register-value pair in command + * buffer of DSB. During command buffer overflow, a warning is thrown + * and rest all erroneous condition register programming is done + * through mmio write. + */ +void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 *buf = dsb->cmd_buf; + + if (!buf) { + I915_WRITE(reg, val); + return; + } + + if (WARN_ON(dsb->free_pos >= DSB_BUF_SIZE)) { + DRM_DEBUG_KMS("DSB buffer overflow\n"); + return; + } + + dsb->ins_start_offset = dsb->free_pos; + buf[dsb->free_pos++] = val; + buf[dsb->free_pos++] = (DSB_OPCODE_MMIO_WRITE << DSB_OPCODE_SHIFT) | + (DSB_BYTE_EN << DSB_BYTE_EN_SHIFT) | + i915_mmio_reg_offset(reg); +} + +/** + * intel_dsb_commit() - Trigger workload execution of DSB. + * @dsb: intel_dsb structure. + * + * This function is used to do actual write to hardware using DSB. + * On errors, fall back to MMIO. Also this function help to reset the context. + */ +void intel_dsb_commit(struct intel_dsb *dsb) +{ + struct intel_crtc *crtc = container_of(dsb, typeof(*crtc), dsb); + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + enum pipe pipe = crtc->pipe; + u32 tail; + + if (!dsb->free_pos) + return; + + if (!intel_dsb_enable_engine(dsb)) + goto reset; + + if (is_dsb_busy(dsb)) { + DRM_ERROR("HEAD_PTR write failed - dsb engine is busy.\n"); + goto reset; + } + I915_WRITE(DSB_HEAD(pipe, dsb->id), i915_ggtt_offset(dsb->vma)); + + tail = ALIGN(dsb->free_pos * 4, CACHELINE_BYTES); + if (tail > dsb->free_pos * 4) + memset(&dsb->cmd_buf[dsb->free_pos], 0, + (tail - dsb->free_pos * 4)); + + if (is_dsb_busy(dsb)) { + DRM_ERROR("TAIL_PTR write failed - dsb engine is busy.\n"); + goto reset; + } + DRM_DEBUG_KMS("DSB execution started - head 0x%x, tail 0x%x\n", + i915_ggtt_offset(dsb->vma), tail); + I915_WRITE(DSB_TAIL(pipe, dsb->id), i915_ggtt_offset(dsb->vma) + tail); + if (wait_for(!is_dsb_busy(dsb), 1)) { + DRM_ERROR("Timed out waiting for DSB workload completion.\n"); + goto reset; + } + +reset: + dsb->free_pos = 0; + dsb->ins_start_offset = 0; + intel_dsb_disable_engine(dsb); +} diff --git a/drivers/gpu/drm/i915/display/intel_dsb.h b/drivers/gpu/drm/i915/display/intel_dsb.h new file mode 100644 index 000000000000..6f95c8e909e6 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dsb.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef _INTEL_DSB_H +#define _INTEL_DSB_H + +#include <linux/types.h> + +#include "i915_reg.h" + +struct intel_crtc; +struct i915_vma; + +enum dsb_id { + INVALID_DSB = -1, + DSB1, + DSB2, + DSB3, + MAX_DSB_PER_PIPE +}; + +struct intel_dsb { + atomic_t refcount; + enum dsb_id id; + u32 *cmd_buf; + struct i915_vma *vma; + + /* + * free_pos will point the first free entry position + * and help in calculating tail of command buffer. + */ + int free_pos; + + /* + * ins_start_offset will help to store start address of the dsb + * instuction and help in identifying the batch of auto-increment + * register. + */ + u32 ins_start_offset; +}; + +struct intel_dsb * +intel_dsb_get(struct intel_crtc *crtc); +void intel_dsb_put(struct intel_dsb *dsb); +void intel_dsb_reg_write(struct intel_dsb *dsb, i915_reg_t reg, u32 val); +void intel_dsb_indexed_reg_write(struct intel_dsb *dsb, i915_reg_t reg, + u32 val); +void intel_dsb_commit(struct intel_dsb *dsb); + +#endif diff --git a/drivers/gpu/drm/i915/display/intel_dsi.c b/drivers/gpu/drm/i915/display/intel_dsi.c index 5fec02aceaed..a2a937109a5a 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi.c +++ b/drivers/gpu/drm/i915/display/intel_dsi.c @@ -55,6 +55,7 @@ int intel_dsi_get_modes(struct drm_connector *connector) enum drm_mode_status intel_dsi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { + struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_connector *intel_connector = to_intel_connector(connector); const struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; @@ -73,7 +74,7 @@ enum drm_mode_status intel_dsi_mode_valid(struct drm_connector *connector, return MODE_CLOCK_HIGH; } - return MODE_OK; + return intel_mode_valid_max_plane_size(dev_priv, mode); } struct intel_dsi_host *intel_dsi_host_init(struct intel_dsi *intel_dsi, diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c index 93baf366692e..9827f99491d1 100644 --- a/drivers/gpu/drm/i915/display/intel_dvo.c +++ b/drivers/gpu/drm/i915/display/intel_dvo.c @@ -280,7 +280,7 @@ static void intel_dvo_pre_enable(struct intel_encoder *encoder, struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; struct intel_dvo *intel_dvo = enc_to_dvo(encoder); - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; u32 dvo_val; i915_reg_t dvo_reg = intel_dvo->dev.dvo_reg; i915_reg_t dvo_srcdim_reg = intel_dvo->dev.dvo_srcdim_reg; @@ -505,7 +505,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_DVO; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); switch (dvo->type) { case INTEL_DVO_CHIP_TMDS: diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 16ed44bfd734..3111ecaeabd0 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -343,8 +343,8 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) HSW_FBCQ_DIS); } - if (IS_GEN(dev_priv, 11)) - /* Wa_1409120013:icl,ehl */ + if (INTEL_GEN(dev_priv) >= 11) + /* Wa_1409120013:icl,ehl,tgl */ I915_WRITE(ILK_DPFC_CHICKEN, ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL); I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); @@ -1320,6 +1320,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) fbc->enabled = false; fbc->active = false; + if (!drm_mm_initialized(&dev_priv->mm.stolen)) + mkwrite_device_info(dev_priv)->display.has_fbc = false; + if (need_fbc_vtd_wa(dev_priv)) mkwrite_device_info(dev_priv)->display.has_fbc = false; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index d59eee5c5d9c..3d1061470e76 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -141,10 +141,10 @@ static int intelfb_alloc(struct drm_fb_helper *helper, /* If the FB is too big, just don't use it since fbdev is not very * important and we should probably use that space with FBC or other * features. */ - obj = NULL; + obj = ERR_PTR(-ENODEV); if (size * 2 < dev_priv->stolen_usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); - if (obj == NULL) + if (IS_ERR(obj)) obj = i915_gem_object_create_shmem(dev_priv, size); if (IS_ERR(obj)) { DRM_ERROR("failed to allocate framebuffer\n"); @@ -204,7 +204,6 @@ static int intelfb_create(struct drm_fb_helper *helper, sizes->fb_height = intel_fb->base.height; } - mutex_lock(&dev->struct_mutex); wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); /* Pin the GGTT vma for our access via info->screen_base. @@ -266,7 +265,6 @@ static int intelfb_create(struct drm_fb_helper *helper, ifbdev->vma_flags = flags; intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev->struct_mutex); vga_switcheroo_client_fb_set(pdev, info); return 0; @@ -274,7 +272,6 @@ out_unpin: intel_unpin_fb_vma(vma, flags); out_unlock: intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev->struct_mutex); return ret; } @@ -291,11 +288,8 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) drm_fb_helper_fini(&ifbdev->helper); - if (ifbdev->vma) { - mutex_lock(&ifbdev->helper.dev->struct_mutex); + if (ifbdev->vma) intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags); - mutex_unlock(&ifbdev->helper.dev->struct_mutex); - } if (ifbdev->fb) drm_framebuffer_remove(&ifbdev->fb->base); @@ -444,7 +438,7 @@ int intel_fbdev_init(struct drm_device *dev) struct intel_fbdev *ifbdev; int ret; - if (WARN_ON(!HAS_DISPLAY(dev_priv))) + if (WARN_ON(!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv))) return -ENODEV; ifbdev = kzalloc(sizeof(struct intel_fbdev), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 719379774fa5..84b164f31895 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -206,6 +206,7 @@ static int frontbuffer_active(struct i915_active *ref) return 0; } +__i915_active_call static void frontbuffer_retire(struct i915_active *ref) { struct intel_frontbuffer *front = @@ -220,11 +221,18 @@ static void frontbuffer_release(struct kref *ref) { struct intel_frontbuffer *front = container_of(ref, typeof(*front), ref); + struct drm_i915_gem_object *obj = front->obj; + struct i915_vma *vma; - front->obj->frontbuffer = NULL; - spin_unlock(&to_i915(front->obj->base.dev)->fb_tracking.lock); + spin_lock(&obj->vma.lock); + for_each_ggtt_vma(vma, obj) + vma->display_alignment = I915_GTT_MIN_ALIGNMENT; + spin_unlock(&obj->vma.lock); - i915_gem_object_put(front->obj); + obj->frontbuffer = NULL; + spin_unlock(&to_i915(obj->base.dev)->fb_tracking.lock); + + i915_gem_object_put(obj); kfree(front); } @@ -249,8 +257,9 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj) front->obj = obj; kref_init(&front->ref); atomic_set(&front->bits, 0); - i915_active_init(i915, &front->write, - frontbuffer_active, frontbuffer_retire); + i915_active_init(&front->write, + frontbuffer_active, + i915_active_may_sleep(frontbuffer_retire)); spin_lock(&i915->fb_tracking.lock); if (obj->frontbuffer) { diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c index d6775a005726..3d4d19ac1d14 100644 --- a/drivers/gpu/drm/i915/display/intel_gmbus.c +++ b/drivers/gpu/drm/i915/display/intel_gmbus.c @@ -836,7 +836,7 @@ int intel_gmbus_setup(struct drm_i915_private *dev_priv) unsigned int pin; int ret; - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(dev_priv) || !INTEL_DISPLAY_ENABLED(dev_priv)) return 0; if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 6ec5ceeab601..e69fa34528df 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -1,9 +1,11 @@ /* SPDX-License-Identifier: MIT */ /* * Copyright (C) 2017 Google, Inc. + * Copyright _ 2017-2019, Intel Corporation. * * Authors: * Sean Paul <seanpaul@chromium.org> + * Ramalingam C <ramalingam.c@intel.com> */ #include <linux/component.h> @@ -18,6 +20,7 @@ #include "intel_display_types.h" #include "intel_hdcp.h" #include "intel_sideband.h" +#include "intel_connector.h" #define KEY_LOAD_TRIES 5 #define ENCRYPT_STATUS_CHANGE_TIMEOUT_MS 50 @@ -105,24 +108,20 @@ bool intel_hdcp2_capable(struct intel_connector *connector) return capable; } -static inline bool intel_hdcp_in_use(struct intel_connector *connector) +static inline +bool intel_hdcp_in_use(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, enum port port) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - enum port port = connector->encoder->port; - u32 reg; - - reg = I915_READ(PORT_HDCP_STATUS(port)); - return reg & HDCP_STATUS_ENC; + return I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) & + HDCP_STATUS_ENC; } -static inline bool intel_hdcp2_in_use(struct intel_connector *connector) +static inline +bool intel_hdcp2_in_use(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, enum port port) { - struct drm_i915_private *dev_priv = to_i915(connector->base.dev); - enum port port = connector->encoder->port; - u32 reg; - - reg = I915_READ(HDCP2_STATUS_DDI(port)); - return reg & LINK_ENCRYPTION_STATUS; + return I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_ENCRYPTION_STATUS; } static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *intel_dig_port, @@ -253,9 +252,29 @@ static int intel_write_sha_text(struct drm_i915_private *dev_priv, u32 sha_text) } static -u32 intel_hdcp_get_repeater_ctl(struct intel_digital_port *intel_dig_port) +u32 intel_hdcp_get_repeater_ctl(struct drm_i915_private *dev_priv, + enum transcoder cpu_transcoder, enum port port) { - enum port port = intel_dig_port->base.port; + if (INTEL_GEN(dev_priv) >= 12) { + switch (cpu_transcoder) { + case TRANSCODER_A: + return HDCP_TRANSA_REP_PRESENT | + HDCP_TRANSA_SHA1_M0; + case TRANSCODER_B: + return HDCP_TRANSB_REP_PRESENT | + HDCP_TRANSB_SHA1_M0; + case TRANSCODER_C: + return HDCP_TRANSC_REP_PRESENT | + HDCP_TRANSC_SHA1_M0; + case TRANSCODER_D: + return HDCP_TRANSD_REP_PRESENT | + HDCP_TRANSD_SHA1_M0; + default: + DRM_ERROR("Unknown transcoder %d\n", cpu_transcoder); + return -EINVAL; + } + } + switch (port) { case PORT_A: return HDCP_DDIA_REP_PRESENT | HDCP_DDIA_SHA1_M0; @@ -268,18 +287,20 @@ u32 intel_hdcp_get_repeater_ctl(struct intel_digital_port *intel_dig_port) case PORT_E: return HDCP_DDIE_REP_PRESENT | HDCP_DDIE_SHA1_M0; default: - break; + DRM_ERROR("Unknown port %d\n", port); + return -EINVAL; } - DRM_ERROR("Unknown port %d\n", port); - return -EINVAL; } static -int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port, +int intel_hdcp_validate_v_prime(struct intel_connector *connector, const struct intel_hdcp_shim *shim, u8 *ksv_fifo, u8 num_downstream, u8 *bstatus) { + struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); struct drm_i915_private *dev_priv; + enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder; + enum port port = intel_dig_port->base.port; u32 vprime, sha_text, sha_leftovers, rep_ctl; int ret, i, j, sha_idx; @@ -306,7 +327,7 @@ int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port, sha_idx = 0; sha_text = 0; sha_leftovers = 0; - rep_ctl = intel_hdcp_get_repeater_ctl(intel_dig_port); + rep_ctl = intel_hdcp_get_repeater_ctl(dev_priv, cpu_transcoder, port); I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); for (i = 0; i < num_downstream; i++) { unsigned int sha_empty; @@ -548,7 +569,7 @@ int intel_hdcp_auth_downstream(struct intel_connector *connector) * V prime atleast twice. */ for (i = 0; i < tries; i++) { - ret = intel_hdcp_validate_v_prime(intel_dig_port, shim, + ret = intel_hdcp_validate_v_prime(connector, shim, ksv_fifo, num_downstream, bstatus); if (!ret) @@ -576,6 +597,7 @@ static int intel_hdcp_auth(struct intel_connector *connector) struct drm_device *dev = connector->base.dev; const struct intel_hdcp_shim *shim = hdcp->shim; struct drm_i915_private *dev_priv; + enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder; enum port port; unsigned long r0_prime_gen_start; int ret, i, tries = 2; @@ -615,18 +637,21 @@ static int intel_hdcp_auth(struct intel_connector *connector) /* Initialize An with 2 random values and acquire it */ for (i = 0; i < 2; i++) - I915_WRITE(PORT_HDCP_ANINIT(port), get_random_u32()); - I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_CAPTURE_AN); + I915_WRITE(HDCP_ANINIT(dev_priv, cpu_transcoder, port), + get_random_u32()); + I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port), + HDCP_CONF_CAPTURE_AN); /* Wait for An to be acquired */ - if (intel_de_wait_for_set(dev_priv, PORT_HDCP_STATUS(port), + if (intel_de_wait_for_set(dev_priv, + HDCP_STATUS(dev_priv, cpu_transcoder, port), HDCP_STATUS_AN_READY, 1)) { DRM_ERROR("Timed out waiting for An\n"); return -ETIMEDOUT; } - an.reg[0] = I915_READ(PORT_HDCP_ANLO(port)); - an.reg[1] = I915_READ(PORT_HDCP_ANHI(port)); + an.reg[0] = I915_READ(HDCP_ANLO(dev_priv, cpu_transcoder, port)); + an.reg[1] = I915_READ(HDCP_ANHI(dev_priv, cpu_transcoder, port)); ret = shim->write_an_aksv(intel_dig_port, an.shim); if (ret) return ret; @@ -644,24 +669,26 @@ static int intel_hdcp_auth(struct intel_connector *connector) return -EPERM; } - I915_WRITE(PORT_HDCP_BKSVLO(port), bksv.reg[0]); - I915_WRITE(PORT_HDCP_BKSVHI(port), bksv.reg[1]); + I915_WRITE(HDCP_BKSVLO(dev_priv, cpu_transcoder, port), bksv.reg[0]); + I915_WRITE(HDCP_BKSVHI(dev_priv, cpu_transcoder, port), bksv.reg[1]); ret = shim->repeater_present(intel_dig_port, &repeater_present); if (ret) return ret; if (repeater_present) I915_WRITE(HDCP_REP_CTL, - intel_hdcp_get_repeater_ctl(intel_dig_port)); + intel_hdcp_get_repeater_ctl(dev_priv, cpu_transcoder, + port)); ret = shim->toggle_signalling(intel_dig_port, true); if (ret) return ret; - I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_AUTH_AND_ENC); + I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port), + HDCP_CONF_AUTH_AND_ENC); /* Wait for R0 ready */ - if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + if (wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) & (HDCP_STATUS_R0_READY | HDCP_STATUS_ENC), 1)) { DRM_ERROR("Timed out waiting for R0 ready\n"); return -ETIMEDOUT; @@ -689,22 +716,25 @@ static int intel_hdcp_auth(struct intel_connector *connector) ret = shim->read_ri_prime(intel_dig_port, ri.shim); if (ret) return ret; - I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + I915_WRITE(HDCP_RPRIME(dev_priv, cpu_transcoder, port), ri.reg); /* Wait for Ri prime match */ - if (!wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + if (!wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port)) & (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) break; } if (i == tries) { DRM_DEBUG_KMS("Timed out waiting for Ri prime match (%x)\n", - I915_READ(PORT_HDCP_STATUS(port))); + I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port))); return -ETIMEDOUT; } /* Wait for encryption confirmation */ - if (intel_de_wait_for_set(dev_priv, PORT_HDCP_STATUS(port), + if (intel_de_wait_for_set(dev_priv, + HDCP_STATUS(dev_priv, cpu_transcoder, port), HDCP_STATUS_ENC, ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { DRM_ERROR("Timed out waiting for encryption\n"); @@ -729,15 +759,17 @@ static int _intel_hdcp_disable(struct intel_connector *connector) struct drm_i915_private *dev_priv = connector->base.dev->dev_private; struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); enum port port = intel_dig_port->base.port; + enum transcoder cpu_transcoder = hdcp->cpu_transcoder; int ret; DRM_DEBUG_KMS("[%s:%d] HDCP is being disabled...\n", connector->base.name, connector->base.base.id); hdcp->hdcp_encrypted = false; - I915_WRITE(PORT_HDCP_CONF(port), 0); - if (intel_de_wait_for_clear(dev_priv, PORT_HDCP_STATUS(port), ~0, - ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { + I915_WRITE(HDCP_CONF(dev_priv, cpu_transcoder, port), 0); + if (intel_de_wait_for_clear(dev_priv, + HDCP_STATUS(dev_priv, cpu_transcoder, port), + ~0, ENCRYPT_STATUS_CHANGE_TIMEOUT_MS)) { DRM_ERROR("Failed to disable HDCP, timeout clearing status\n"); return -ETIMEDOUT; } @@ -808,9 +840,11 @@ static int intel_hdcp_check_link(struct intel_connector *connector) struct drm_i915_private *dev_priv = connector->base.dev->dev_private; struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); enum port port = intel_dig_port->base.port; + enum transcoder cpu_transcoder; int ret = 0; mutex_lock(&hdcp->mutex); + cpu_transcoder = hdcp->cpu_transcoder; /* Check_link valid only when HDCP1.4 is enabled */ if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_ENABLED || @@ -819,10 +853,11 @@ static int intel_hdcp_check_link(struct intel_connector *connector) goto out; } - if (WARN_ON(!intel_hdcp_in_use(connector))) { + if (WARN_ON(!intel_hdcp_in_use(dev_priv, cpu_transcoder, port))) { DRM_ERROR("%s:%d HDCP link stopped encryption,%x\n", connector->base.name, connector->base.base.id, - I915_READ(PORT_HDCP_STATUS(port))); + I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port))); ret = -ENXIO; hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; schedule_work(&hdcp->prop_work); @@ -1493,10 +1528,11 @@ static int hdcp2_enable_encryption(struct intel_connector *connector) struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; enum port port = connector->encoder->port; + enum transcoder cpu_transcoder = hdcp->cpu_transcoder; int ret; - WARN_ON(I915_READ(HDCP2_STATUS_DDI(port)) & LINK_ENCRYPTION_STATUS); - + WARN_ON(I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_ENCRYPTION_STATUS); if (hdcp->shim->toggle_signalling) { ret = hdcp->shim->toggle_signalling(intel_dig_port, true); if (ret) { @@ -1506,14 +1542,18 @@ static int hdcp2_enable_encryption(struct intel_connector *connector) } } - if (I915_READ(HDCP2_STATUS_DDI(port)) & LINK_AUTH_STATUS) { + if (I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_AUTH_STATUS) { /* Link is Authenticated. Now set for Encryption */ - I915_WRITE(HDCP2_CTL_DDI(port), - I915_READ(HDCP2_CTL_DDI(port)) | + I915_WRITE(HDCP2_CTL(dev_priv, cpu_transcoder, port), + I915_READ(HDCP2_CTL(dev_priv, cpu_transcoder, + port)) | CTL_LINK_ENCRYPTION_REQ); } - ret = intel_de_wait_for_set(dev_priv, HDCP2_STATUS_DDI(port), + ret = intel_de_wait_for_set(dev_priv, + HDCP2_STATUS(dev_priv, cpu_transcoder, + port), LINK_ENCRYPTION_STATUS, ENCRYPT_STATUS_CHANGE_TIMEOUT_MS); @@ -1526,14 +1566,19 @@ static int hdcp2_disable_encryption(struct intel_connector *connector) struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; enum port port = connector->encoder->port; + enum transcoder cpu_transcoder = hdcp->cpu_transcoder; int ret; - WARN_ON(!(I915_READ(HDCP2_STATUS_DDI(port)) & LINK_ENCRYPTION_STATUS)); + WARN_ON(!(I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, port)) & + LINK_ENCRYPTION_STATUS)); - I915_WRITE(HDCP2_CTL_DDI(port), - I915_READ(HDCP2_CTL_DDI(port)) & ~CTL_LINK_ENCRYPTION_REQ); + I915_WRITE(HDCP2_CTL(dev_priv, cpu_transcoder, port), + I915_READ(HDCP2_CTL(dev_priv, cpu_transcoder, port)) & + ~CTL_LINK_ENCRYPTION_REQ); - ret = intel_de_wait_for_clear(dev_priv, HDCP2_STATUS_DDI(port), + ret = intel_de_wait_for_clear(dev_priv, + HDCP2_STATUS(dev_priv, cpu_transcoder, + port), LINK_ENCRYPTION_STATUS, ENCRYPT_STATUS_CHANGE_TIMEOUT_MS); if (ret == -ETIMEDOUT) @@ -1632,9 +1677,11 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; enum port port = connector->encoder->port; + enum transcoder cpu_transcoder; int ret = 0; mutex_lock(&hdcp->mutex); + cpu_transcoder = hdcp->cpu_transcoder; /* hdcp2_check_link is expected only when HDCP2.2 is Enabled */ if (hdcp->value != DRM_MODE_CONTENT_PROTECTION_ENABLED || @@ -1643,9 +1690,10 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) goto out; } - if (WARN_ON(!intel_hdcp2_in_use(connector))) { + if (WARN_ON(!intel_hdcp2_in_use(dev_priv, cpu_transcoder, port))) { DRM_ERROR("HDCP2.2 link stopped the encryption, %x\n", - I915_READ(HDCP2_STATUS_DDI(port))); + I915_READ(HDCP2_STATUS(dev_priv, cpu_transcoder, + port))); ret = -ENXIO; hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; schedule_work(&hdcp->prop_work); @@ -1749,13 +1797,71 @@ static const struct component_ops i915_hdcp_component_ops = { .unbind = i915_hdcp_component_unbind, }; +static inline +enum mei_fw_ddi intel_get_mei_fw_ddi_index(enum port port) +{ + switch (port) { + case PORT_A: + return MEI_DDI_A; + case PORT_B ... PORT_F: + return (enum mei_fw_ddi)port; + default: + return MEI_DDI_INVALID_PORT; + } +} + +static inline +enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder) +{ + switch (cpu_transcoder) { + case TRANSCODER_A ... TRANSCODER_D: + return (enum mei_fw_tc)(cpu_transcoder | 0x10); + default: /* eDP, DSI TRANSCODERS are non HDCP capable */ + return MEI_INVALID_TRANSCODER; + } +} + +void intel_hdcp_transcoder_config(struct intel_connector *connector, + enum transcoder cpu_transcoder) +{ + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_hdcp *hdcp = &connector->hdcp; + + if (!hdcp->shim) + return; + + if (INTEL_GEN(dev_priv) >= 12) { + mutex_lock(&hdcp->mutex); + hdcp->cpu_transcoder = cpu_transcoder; + hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder); + mutex_unlock(&hdcp->mutex); + } +} + static inline int initialize_hdcp_port_data(struct intel_connector *connector, const struct intel_hdcp_shim *shim) { + struct drm_i915_private *dev_priv = to_i915(connector->base.dev); struct intel_hdcp *hdcp = &connector->hdcp; struct hdcp_port_data *data = &hdcp->port_data; - data->port = connector->encoder->port; + if (INTEL_GEN(dev_priv) < 12) + data->fw_ddi = + intel_get_mei_fw_ddi_index(connector->encoder->port); + else + /* + * As per ME FW API expectation, for GEN 12+, fw_ddi is filled + * with zero(INVALID PORT index). + */ + data->fw_ddi = MEI_DDI_INVALID_PORT; + + /* + * As associated transcoder is set and modified at modeset, here fw_tc + * is initialized to zero (invalid transcoder index). This will be + * retained for <Gen12 forever. + */ + data->fw_tc = MEI_INVALID_TRANSCODER; + data->port_type = (u8)HDCP_PORT_TYPE_INTEGRATED; data->protocol = (u8)shim->protocol; diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h index 13555b054930..41c1053d9e38 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp.h @@ -15,10 +15,14 @@ struct drm_connector_state; struct drm_i915_private; struct intel_connector; struct intel_hdcp_shim; +enum port; +enum transcoder; void intel_hdcp_atomic_check(struct drm_connector *connector, struct drm_connector_state *old_state, struct drm_connector_state *new_state); +void intel_hdcp_transcoder_config(struct intel_connector *connector, + enum transcoder cpu_transcoder); int intel_hdcp_init(struct intel_connector *connector, const struct intel_hdcp_shim *hdcp_shim); int intel_hdcp_enable(struct intel_connector *connector, u8 content_type); diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 0f5a0c618e46..b54ccbb5aad5 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -189,13 +189,19 @@ hsw_dip_data_reg(struct drm_i915_private *dev_priv, } } -static int hsw_dip_data_size(unsigned int type) +static int hsw_dip_data_size(struct drm_i915_private *dev_priv, + unsigned int type) { switch (type) { case DP_SDP_VSC: return VIDEO_DIP_VSC_DATA_SIZE; case DP_SDP_PPS: return VIDEO_DIP_PPS_DATA_SIZE; + case HDMI_PACKET_TYPE_GAMUT_METADATA: + if (INTEL_GEN(dev_priv) >= 11) + return VIDEO_DIP_GMP_DATA_SIZE; + else + return VIDEO_DIP_DATA_SIZE; default: return VIDEO_DIP_DATA_SIZE; } @@ -514,7 +520,9 @@ static void hsw_write_infoframe(struct intel_encoder *encoder, int i; u32 val = I915_READ(ctl_reg); - data_size = hsw_dip_data_size(type); + data_size = hsw_dip_data_size(dev_priv, type); + + WARN_ON(len > data_size); val &= ~hsw_infoframe_enable(type); I915_WRITE(ctl_reg, val); @@ -724,11 +732,20 @@ intel_hdmi_compute_avi_infoframe(struct intel_encoder *encoder, drm_hdmi_avi_infoframe_colorspace(frame, conn_state); - drm_hdmi_avi_infoframe_quant_range(frame, connector, - adjusted_mode, - crtc_state->limited_color_range ? - HDMI_QUANTIZATION_RANGE_LIMITED : - HDMI_QUANTIZATION_RANGE_FULL); + /* nonsense combination */ + WARN_ON(crtc_state->limited_color_range && + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB); + + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_RGB) { + drm_hdmi_avi_infoframe_quant_range(frame, connector, + adjusted_mode, + crtc_state->limited_color_range ? + HDMI_QUANTIZATION_RANGE_LIMITED : + HDMI_QUANTIZATION_RANGE_FULL); + } else { + frame->quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT; + frame->ycc_quantization_range = HDMI_YCC_QUANTIZATION_RANGE_LIMITED; + } drm_hdmi_avi_infoframe_content_type(frame, conn_state); @@ -1491,7 +1508,10 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) { struct drm_i915_private *dev_priv = intel_dig_port->base.base.dev->dev_private; + struct intel_connector *connector = + intel_dig_port->hdmi.attached_connector; enum port port = intel_dig_port->base.port; + enum transcoder cpu_transcoder = connector->hdcp.cpu_transcoder; int ret; union { u32 reg; @@ -1502,39 +1522,30 @@ bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) if (ret) return false; - I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + I915_WRITE(HDCP_RPRIME(dev_priv, cpu_transcoder, port), ri.reg); /* Wait for Ri prime match */ - if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + if (wait_for(I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, port)) & (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { DRM_ERROR("Ri' mismatch detected, link check failed (%x)\n", - I915_READ(PORT_HDCP_STATUS(port))); + I915_READ(HDCP_STATUS(dev_priv, cpu_transcoder, + port))); return false; } return true; } -struct hdcp2_hdmi_msg_data { +struct hdcp2_hdmi_msg_timeout { u8 msg_id; - u32 timeout; - u32 timeout2; + u16 timeout; }; -static const struct hdcp2_hdmi_msg_data hdcp2_msg_data[] = { - { HDCP_2_2_AKE_INIT, 0, 0 }, - { HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, 0 }, - { HDCP_2_2_AKE_NO_STORED_KM, 0, 0 }, - { HDCP_2_2_AKE_STORED_KM, 0, 0 }, - { HDCP_2_2_AKE_SEND_HPRIME, HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS, - HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS }, - { HDCP_2_2_AKE_SEND_PAIRING_INFO, HDCP_2_2_PAIRING_TIMEOUT_MS, 0 }, - { HDCP_2_2_LC_INIT, 0, 0 }, - { HDCP_2_2_LC_SEND_LPRIME, HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS, 0 }, - { HDCP_2_2_SKE_SEND_EKS, 0, 0 }, - { HDCP_2_2_REP_SEND_RECVID_LIST, HDCP_2_2_RECVID_LIST_TIMEOUT_MS, 0 }, - { HDCP_2_2_REP_SEND_ACK, 0, 0 }, - { HDCP_2_2_REP_STREAM_MANAGE, 0, 0 }, - { HDCP_2_2_REP_STREAM_READY, HDCP_2_2_STREAM_READY_TIMEOUT_MS, 0 }, +static const struct hdcp2_hdmi_msg_timeout hdcp2_msg_timeout[] = { + { HDCP_2_2_AKE_SEND_CERT, HDCP_2_2_CERT_TIMEOUT_MS, }, + { HDCP_2_2_AKE_SEND_PAIRING_INFO, HDCP_2_2_PAIRING_TIMEOUT_MS, }, + { HDCP_2_2_LC_SEND_LPRIME, HDCP_2_2_HDMI_LPRIME_TIMEOUT_MS, }, + { HDCP_2_2_REP_SEND_RECVID_LIST, HDCP_2_2_RECVID_LIST_TIMEOUT_MS, }, + { HDCP_2_2_REP_STREAM_READY, HDCP_2_2_STREAM_READY_TIMEOUT_MS, }, }; static @@ -1551,12 +1562,17 @@ static int get_hdcp2_msg_timeout(u8 msg_id, bool is_paired) { int i; - for (i = 0; i < ARRAY_SIZE(hdcp2_msg_data); i++) - if (hdcp2_msg_data[i].msg_id == msg_id && - (msg_id != HDCP_2_2_AKE_SEND_HPRIME || is_paired)) - return hdcp2_msg_data[i].timeout; - else if (hdcp2_msg_data[i].msg_id == msg_id) - return hdcp2_msg_data[i].timeout2; + if (msg_id == HDCP_2_2_AKE_SEND_HPRIME) { + if (is_paired) + return HDCP_2_2_HPRIME_PAIRED_TIMEOUT_MS; + else + return HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS; + } + + for (i = 0; i < ARRAY_SIZE(hdcp2_msg_timeout); i++) { + if (hdcp2_msg_timeout[i].msg_id == msg_id) + return hdcp2_msg_timeout[i].timeout; + } return -EINVAL; } @@ -2184,8 +2200,10 @@ intel_hdmi_mode_valid(struct drm_connector *connector, status = hdmi_port_clock_valid(hdmi, clock * 5 / 4, true, force_dvi); } + if (status != MODE_OK) + return status; - return status; + return intel_mode_valid_max_plane_size(dev_priv, mode); } static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, @@ -2261,9 +2279,7 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state, static bool intel_hdmi_ycbcr420_config(struct drm_connector *connector, - struct intel_crtc_state *config, - int *clock_12bpc, int *clock_10bpc, - int *clock_8bpc) + struct intel_crtc_state *config) { struct intel_crtc *intel_crtc = to_intel_crtc(config->base.crtc); @@ -2272,11 +2288,6 @@ intel_hdmi_ycbcr420_config(struct drm_connector *connector, return false; } - /* YCBCR420 TMDS rate requirement is half the pixel clock */ - config->port_clock /= 2; - *clock_12bpc /= 2; - *clock_10bpc /= 2; - *clock_8bpc /= 2; config->output_format = INTEL_OUTPUT_FORMAT_YCBCR420; /* YCBCR 420 output conversion needs a scaler */ @@ -2291,6 +2302,104 @@ intel_hdmi_ycbcr420_config(struct drm_connector *connector, return true; } +static int intel_hdmi_port_clock(int clock, int bpc) +{ + /* + * Need to adjust the port link by: + * 1.5x for 12bpc + * 1.25x for 10bpc + */ + return clock * bpc / 8; +} + +static int intel_hdmi_compute_bpc(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + int clock, bool force_dvi) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + int bpc; + + for (bpc = 12; bpc >= 10; bpc -= 2) { + if (hdmi_deep_color_possible(crtc_state, bpc) && + hdmi_port_clock_valid(intel_hdmi, + intel_hdmi_port_clock(clock, bpc), + true, force_dvi) == MODE_OK) + return bpc; + } + + return 8; +} + +static int intel_hdmi_compute_clock(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + bool force_dvi) +{ + struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + int bpc, clock = adjusted_mode->crtc_clock; + + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) + clock *= 2; + + /* YCBCR420 TMDS rate requirement is half the pixel clock */ + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + clock /= 2; + + bpc = intel_hdmi_compute_bpc(encoder, crtc_state, + clock, force_dvi); + + crtc_state->port_clock = intel_hdmi_port_clock(clock, bpc); + + /* + * pipe_bpp could already be below 8bpc due to + * FDI bandwidth constraints. We shouldn't bump it + * back up to 8bpc in that case. + */ + if (crtc_state->pipe_bpp > bpc * 3) + crtc_state->pipe_bpp = bpc * 3; + + DRM_DEBUG_KMS("picking %d bpc for HDMI output (pipe bpp: %d)\n", + bpc, crtc_state->pipe_bpp); + + if (hdmi_port_clock_valid(intel_hdmi, crtc_state->port_clock, + false, force_dvi) != MODE_OK) { + DRM_DEBUG_KMS("unsupported HDMI clock (%d kHz), rejecting mode\n", + crtc_state->port_clock); + return -EINVAL; + } + + return 0; +} + +static bool intel_hdmi_limited_color_range(const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) +{ + const struct intel_digital_connector_state *intel_conn_state = + to_intel_digital_connector_state(conn_state); + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + + /* + * Our YCbCr output is always limited range. + * crtc_state->limited_color_range only applies to RGB, + * and it must never be set for YCbCr or we risk setting + * some conflicting bits in PIPECONF which will mess up + * the colors on the monitor. + */ + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB) + return false; + + if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { + /* See CEA-861-E - 5.1 Default Encoding Parameters */ + return crtc_state->has_hdmi_sink && + drm_default_rgb_quant_range(adjusted_mode) == + HDMI_QUANTIZATION_RANGE_LIMITED; + } else { + return intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; + } +} + int intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -2302,11 +2411,8 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, struct drm_scdc *scdc = &connector->display_info.hdmi.scdc; struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); - int clock_8bpc = pipe_config->base.adjusted_mode.crtc_clock; - int clock_10bpc = clock_8bpc * 5 / 4; - int clock_12bpc = clock_8bpc * 3 / 2; - int desired_bpp; bool force_dvi = intel_conn_state->force_audio == HDMI_AUDIO_OFF_DVI; + int ret; if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return -EINVAL; @@ -2317,33 +2423,19 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, if (pipe_config->has_hdmi_sink) pipe_config->has_infoframe = true; - if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { - /* See CEA-861-E - 5.1 Default Encoding Parameters */ - pipe_config->limited_color_range = - pipe_config->has_hdmi_sink && - drm_default_rgb_quant_range(adjusted_mode) == - HDMI_QUANTIZATION_RANGE_LIMITED; - } else { - pipe_config->limited_color_range = - intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; - } - - if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) { + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) pipe_config->pixel_multiplier = 2; - clock_8bpc *= 2; - clock_10bpc *= 2; - clock_12bpc *= 2; - } if (drm_mode_is_420_only(&connector->display_info, adjusted_mode)) { - if (!intel_hdmi_ycbcr420_config(connector, pipe_config, - &clock_12bpc, &clock_10bpc, - &clock_8bpc)) { + if (!intel_hdmi_ycbcr420_config(connector, pipe_config)) { DRM_ERROR("Can't support YCBCR420 output\n"); return -EINVAL; } } + pipe_config->limited_color_range = + intel_hdmi_limited_color_range(pipe_config, conn_state); + if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv)) pipe_config->has_pch_encoder = true; @@ -2355,43 +2447,9 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, intel_conn_state->force_audio == HDMI_AUDIO_ON; } - /* - * Note that g4x/vlv don't support 12bpc hdmi outputs. We also need - * to check that the higher clock still fits within limits. - */ - if (hdmi_deep_color_possible(pipe_config, 12) && - hdmi_port_clock_valid(intel_hdmi, clock_12bpc, - true, force_dvi) == MODE_OK) { - DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n"); - desired_bpp = 12*3; - - /* Need to adjust the port link by 1.5x for 12bpc. */ - pipe_config->port_clock = clock_12bpc; - } else if (hdmi_deep_color_possible(pipe_config, 10) && - hdmi_port_clock_valid(intel_hdmi, clock_10bpc, - true, force_dvi) == MODE_OK) { - DRM_DEBUG_KMS("picking bpc to 10 for HDMI output\n"); - desired_bpp = 10 * 3; - - /* Need to adjust the port link by 1.25x for 10bpc. */ - pipe_config->port_clock = clock_10bpc; - } else { - DRM_DEBUG_KMS("picking bpc to 8 for HDMI output\n"); - desired_bpp = 8*3; - - pipe_config->port_clock = clock_8bpc; - } - - if (!pipe_config->bw_constrained) { - DRM_DEBUG_KMS("forcing pipe bpp to %i for HDMI\n", desired_bpp); - pipe_config->pipe_bpp = desired_bpp; - } - - if (hdmi_port_clock_valid(intel_hdmi, pipe_config->port_clock, - false, force_dvi) != MODE_OK) { - DRM_DEBUG_KMS("unsupported HDMI clock, rejecting mode\n"); - return -EINVAL; - } + ret = intel_hdmi_compute_clock(encoder, pipe_config, force_dvi); + if (ret) + return ret; /* Set user selected PAR to incoming mode's member */ adjusted_mode->picture_aspect_ratio = conn_state->picture_aspect_ratio; @@ -2431,6 +2489,9 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder, return -EINVAL; } + intel_hdcp_transcoder_config(intel_hdmi->attached_connector, + pipe_config->cpu_transcoder); + return 0; } @@ -3002,7 +3063,7 @@ static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv, if (HAS_PCH_MCC(dev_priv)) ddc_pin = mcc_port_to_ddc_pin(dev_priv, port); - else if (HAS_PCH_TGP(dev_priv) || HAS_PCH_ICP(dev_priv)) + else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) ddc_pin = icl_port_to_ddc_pin(dev_priv, port); else if (HAS_PCH_CNP(dev_priv)) ddc_pin = cnp_port_to_ddc_pin(dev_priv, port); @@ -3070,12 +3131,13 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, enum port port = intel_encoder->port; struct cec_connector_info conn_info; - DRM_DEBUG_KMS("Adding HDMI connector on port %c\n", - port_name(port)); + DRM_DEBUG_KMS("Adding HDMI connector on [ENCODER:%d:%s]\n", + intel_encoder->base.base.id, intel_encoder->base.name); if (WARN(intel_dig_port->max_lanes < 4, - "Not enough lanes (%d) for HDMI on port %c\n", - intel_dig_port->max_lanes, port_name(port))) + "Not enough lanes (%d) for HDMI on [ENCODER:%d:%s]\n", + intel_dig_port->max_lanes, intel_encoder->base.base.id, + intel_encoder->base.name)) return; drm_connector_init(dev, connector, &intel_hdmi_connector_funcs, @@ -3215,11 +3277,11 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, intel_encoder->port = port; if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) - intel_encoder->crtc_mask = 1 << 2; + intel_encoder->crtc_mask = BIT(PIPE_C); else - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); } else { - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); } intel_encoder->cloneable = 1 << INTEL_OUTPUT_ANALOG; /* diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h index 106c2e0bc3c9..cf1ea5427639 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.h +++ b/drivers/gpu/drm/i915/display/intel_hdmi.h @@ -23,6 +23,7 @@ struct intel_crtc_state; struct intel_hdmi; struct drm_connector_state; union hdmi_infoframe; +enum port; void intel_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, enum port port); diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c index 56be20f6f47e..fc29046d48ea 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.c +++ b/drivers/gpu/drm/i915/display/intel_hotplug.c @@ -481,7 +481,8 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, long_hpd = long_mask & BIT(pin); - DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port), + DRM_DEBUG_DRIVER("digital hpd on [ENCODER:%d:%s] - %s\n", + encoder->base.base.id, encoder->base.name, long_hpd ? "long" : "short"); queue_dig = true; diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.h b/drivers/gpu/drm/i915/display/intel_hotplug.h index b0cd447b7fbc..087b5f57b321 100644 --- a/drivers/gpu/drm/i915/display/intel_hotplug.h +++ b/drivers/gpu/drm/i915/display/intel_hotplug.h @@ -13,6 +13,7 @@ struct drm_i915_private; struct intel_connector; struct intel_encoder; +enum port; void intel_hpd_poll_init(struct drm_i915_private *dev_priv); enum intel_hotplug_state intel_encoder_hotplug(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c index b19800b58442..0b67f7887cd0 100644 --- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c @@ -114,7 +114,7 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv) pinfo.size_data = sizeof(*pdata); pinfo.dma_mask = DMA_BIT_MASK(32); - pdata->num_pipes = INTEL_INFO(dev_priv)->num_pipes; + pdata->num_pipes = INTEL_NUM_PIPES(dev_priv); pdata->num_ports = IS_CHERRYVIEW(dev_priv) ? 3 : 2; /* B,C,D or B,C */ pdata->port[0].pipe = -1; pdata->port[1].pipe = -1; diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index b7c459a8931c..13841d7c455b 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -232,7 +232,7 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - int pipe = crtc->pipe; + enum pipe pipe = crtc->pipe; u32 temp; if (HAS_PCH_SPLIT(dev_priv)) { @@ -900,11 +900,11 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) intel_encoder->port = PORT_NONE; intel_encoder->cloneable = 0; if (HAS_PCH_SPLIT(dev_priv)) - intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); else if (IS_GEN(dev_priv, 4)) - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); else - intel_encoder->crtc_mask = (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_B); drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs); connector->display_info.subpixel_order = SubPixelHorizontalRGB; diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 29edfc343716..2360f19f9694 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -230,7 +230,7 @@ alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *)) if (IS_ERR(rq)) return rq; - err = i915_active_ref(&overlay->last_flip, rq->timeline, rq); + err = i915_active_add_request(&overlay->last_flip, rq); if (err) { i915_request_add(rq); return ERR_PTR(err); @@ -439,8 +439,6 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) struct i915_request *rq; u32 *cs; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - /* * Only wait if there is actually an old frame to release to * guarantee forward progress. @@ -751,7 +749,6 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, struct i915_vma *vma; int ret, tmp_width; - lockdep_assert_held(&dev_priv->drm.struct_mutex); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); ret = intel_overlay_release_old_vid(overlay); @@ -852,7 +849,6 @@ int intel_overlay_switch_off(struct intel_overlay *overlay) struct drm_i915_private *dev_priv = overlay->i915; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); ret = intel_overlay_recover_from_interrupt(overlay); @@ -1068,11 +1064,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, if (!(params->flags & I915_OVERLAY_ENABLE)) { drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); - ret = intel_overlay_switch_off(overlay); - - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); return ret; @@ -1088,7 +1080,6 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, return -ENOENT; drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); if (i915_gem_object_is_tiled(new_bo)) { DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n"); @@ -1152,14 +1143,12 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, if (ret != 0) goto out_unlock; - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); i915_gem_object_put(new_bo); return 0; out_unlock: - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); i915_gem_object_put(new_bo); @@ -1233,7 +1222,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, } drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); ret = -EINVAL; if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) { @@ -1290,7 +1278,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, ret = 0; out_unlock: - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); return ret; @@ -1303,15 +1290,11 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) struct i915_vma *vma; int err; - mutex_lock(&i915->drm.struct_mutex); - obj = i915_gem_object_create_stolen(i915, PAGE_SIZE); - if (obj == NULL) + if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_unlock; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) { @@ -1332,13 +1315,10 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) } overlay->reg_bo = obj; - mutex_unlock(&i915->drm.struct_mutex); return 0; err_put_bo: i915_gem_object_put(obj); -err_unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1367,8 +1347,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) overlay->contrast = 75; overlay->saturation = 146; - i915_active_init(dev_priv, - &overlay->last_flip, + i915_active_init(&overlay->last_flip, NULL, intel_overlay_last_flip_retire); ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv)); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 3bfb720560c2..50f22abcd30e 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -88,48 +88,35 @@ static bool intel_psr2_enabled(struct drm_i915_private *dev_priv, } } -static int edp_psr_shift(enum transcoder cpu_transcoder) +static void psr_irq_control(struct drm_i915_private *dev_priv) { - switch (cpu_transcoder) { - case TRANSCODER_A: - return EDP_PSR_TRANSCODER_A_SHIFT; - case TRANSCODER_B: - return EDP_PSR_TRANSCODER_B_SHIFT; - case TRANSCODER_C: - return EDP_PSR_TRANSCODER_C_SHIFT; - default: - MISSING_CASE(cpu_transcoder); - /* fallthrough */ - case TRANSCODER_EDP: - return EDP_PSR_TRANSCODER_EDP_SHIFT; - } -} - -void intel_psr_irq_control(struct drm_i915_private *dev_priv, u32 debug) -{ - u32 debug_mask, mask; - enum transcoder cpu_transcoder; - u32 transcoders = BIT(TRANSCODER_EDP); + enum transcoder trans_shift; + u32 mask, val; + i915_reg_t imr_reg; - if (INTEL_GEN(dev_priv) >= 8) - transcoders |= BIT(TRANSCODER_A) | - BIT(TRANSCODER_B) | - BIT(TRANSCODER_C); - - debug_mask = 0; - mask = 0; - for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { - int shift = edp_psr_shift(cpu_transcoder); - - mask |= EDP_PSR_ERROR(shift); - debug_mask |= EDP_PSR_POST_EXIT(shift) | - EDP_PSR_PRE_ENTRY(shift); + /* + * gen12+ has registers relative to transcoder and one per transcoder + * using the same bit definition: handle it as TRANSCODER_EDP to force + * 0 shift in bit definition + */ + if (INTEL_GEN(dev_priv) >= 12) { + trans_shift = 0; + imr_reg = TRANS_PSR_IMR(dev_priv->psr.transcoder); + } else { + trans_shift = dev_priv->psr.transcoder; + imr_reg = EDP_PSR_IMR; } - if (debug & I915_PSR_DEBUG_IRQ) - mask |= debug_mask; + mask = EDP_PSR_ERROR(trans_shift); + if (dev_priv->psr.debug & I915_PSR_DEBUG_IRQ) + mask |= EDP_PSR_POST_EXIT(trans_shift) | + EDP_PSR_PRE_ENTRY(trans_shift); - I915_WRITE(EDP_PSR_IMR, ~mask); + /* Warning: it is masking/setting reserved bits too */ + val = I915_READ(imr_reg); + val &= ~EDP_PSR_TRANS_MASK(trans_shift); + val |= ~mask; + I915_WRITE(imr_reg, val); } static void psr_event_print(u32 val, bool psr2_enabled) @@ -171,60 +158,58 @@ static void psr_event_print(u32 val, bool psr2_enabled) void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) { - u32 transcoders = BIT(TRANSCODER_EDP); - enum transcoder cpu_transcoder; + enum transcoder cpu_transcoder = dev_priv->psr.transcoder; + enum transcoder trans_shift; + i915_reg_t imr_reg; ktime_t time_ns = ktime_get(); - u32 mask = 0; - if (INTEL_GEN(dev_priv) >= 8) - transcoders |= BIT(TRANSCODER_A) | - BIT(TRANSCODER_B) | - BIT(TRANSCODER_C); - - for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { - int shift = edp_psr_shift(cpu_transcoder); + if (INTEL_GEN(dev_priv) >= 12) { + trans_shift = 0; + imr_reg = TRANS_PSR_IMR(dev_priv->psr.transcoder); + } else { + trans_shift = dev_priv->psr.transcoder; + imr_reg = EDP_PSR_IMR; + } - if (psr_iir & EDP_PSR_ERROR(shift)) { - DRM_WARN("[transcoder %s] PSR aux error\n", - transcoder_name(cpu_transcoder)); + if (psr_iir & EDP_PSR_PRE_ENTRY(trans_shift)) { + dev_priv->psr.last_entry_attempt = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", + transcoder_name(cpu_transcoder)); + } - dev_priv->psr.irq_aux_error = true; + if (psr_iir & EDP_PSR_POST_EXIT(trans_shift)) { + dev_priv->psr.last_exit = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", + transcoder_name(cpu_transcoder)); - /* - * If this interruption is not masked it will keep - * interrupting so fast that it prevents the scheduled - * work to run. - * Also after a PSR error, we don't want to arm PSR - * again so we don't care about unmask the interruption - * or unset irq_aux_error. - */ - mask |= EDP_PSR_ERROR(shift); - } + if (INTEL_GEN(dev_priv) >= 9) { + u32 val = I915_READ(PSR_EVENT(cpu_transcoder)); + bool psr2_enabled = dev_priv->psr.psr2_enabled; - if (psr_iir & EDP_PSR_PRE_ENTRY(shift)) { - dev_priv->psr.last_entry_attempt = time_ns; - DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", - transcoder_name(cpu_transcoder)); + I915_WRITE(PSR_EVENT(cpu_transcoder), val); + psr_event_print(val, psr2_enabled); } + } - if (psr_iir & EDP_PSR_POST_EXIT(shift)) { - dev_priv->psr.last_exit = time_ns; - DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", - transcoder_name(cpu_transcoder)); + if (psr_iir & EDP_PSR_ERROR(trans_shift)) { + u32 val; - if (INTEL_GEN(dev_priv) >= 9) { - u32 val = I915_READ(PSR_EVENT(cpu_transcoder)); - bool psr2_enabled = dev_priv->psr.psr2_enabled; + DRM_WARN("[transcoder %s] PSR aux error\n", + transcoder_name(cpu_transcoder)); - I915_WRITE(PSR_EVENT(cpu_transcoder), val); - psr_event_print(val, psr2_enabled); - } - } - } + dev_priv->psr.irq_aux_error = true; - if (mask) { - mask |= I915_READ(EDP_PSR_IMR); - I915_WRITE(EDP_PSR_IMR, mask); + /* + * If this interruption is not masked it will keep + * interrupting so fast that it prevents the scheduled + * work to run. + * Also after a PSR error, we don't want to arm PSR + * again so we don't care about unmask the interruption + * or unset irq_aux_error. + */ + val = I915_READ(imr_reg); + val |= EDP_PSR_ERROR(trans_shift); + I915_WRITE(imr_reg, val); schedule_work(&dev_priv->psr.work); } @@ -283,6 +268,11 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dp_to_dig_port(intel_dp)->base.base.dev); + if (dev_priv->psr.dp) { + DRM_WARN("More than one eDP panel found, PSR support should be extended\n"); + return; + } + drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, sizeof(intel_dp->psr_dpcd)); @@ -305,7 +295,6 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) dev_priv->psr.sink_sync_latency = intel_dp_get_sink_sync_latency(intel_dp); - WARN_ON(dev_priv->psr.dp); dev_priv->psr.dp = intel_dp; if (INTEL_GEN(dev_priv) >= 9 && @@ -390,7 +379,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp) BUILD_BUG_ON(sizeof(aux_msg) > 20); for (i = 0; i < sizeof(aux_msg); i += 4) - I915_WRITE(EDP_PSR_AUX_DATA(i >> 2), + I915_WRITE(EDP_PSR_AUX_DATA(dev_priv->psr.transcoder, i >> 2), intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i)); aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, 0); @@ -401,7 +390,7 @@ static void hsw_psr_setup_aux(struct intel_dp *intel_dp) /* Select only valid bits for SRD_AUX_CTL */ aux_ctl &= psr_aux_mask; - I915_WRITE(EDP_PSR_AUX_CTL, aux_ctl); + I915_WRITE(EDP_PSR_AUX_CTL(dev_priv->psr.transcoder), aux_ctl); } static void intel_psr_enable_sink(struct intel_dp *intel_dp) @@ -491,8 +480,9 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp) if (INTEL_GEN(dev_priv) >= 8) val |= EDP_PSR_CRC_ENABLE; - val |= I915_READ(EDP_PSR_CTL) & EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK; - I915_WRITE(EDP_PSR_CTL, val); + val |= (I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)) & + EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK); + I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), val); } static void hsw_activate_psr2(struct intel_dp *intel_dp) @@ -528,9 +518,87 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) * PSR2 HW is incorrectly using EDP_PSR_TP1_TP3_SEL and BSpec is * recommending keep this bit unset while PSR2 is enabled. */ - I915_WRITE(EDP_PSR_CTL, 0); + I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), 0); - I915_WRITE(EDP_PSR2_CTL, val); + I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val); +} + +static bool +transcoder_has_psr2(struct drm_i915_private *dev_priv, enum transcoder trans) +{ + if (INTEL_GEN(dev_priv) < 9) + return false; + else if (INTEL_GEN(dev_priv) >= 12) + return trans == TRANSCODER_A; + else + return trans == TRANSCODER_EDP; +} + +static u32 intel_get_frame_time_us(const struct intel_crtc_state *cstate) +{ + if (!cstate || !cstate->base.active) + return 0; + + return DIV_ROUND_UP(1000 * 1000, + drm_mode_vrefresh(&cstate->base.adjusted_mode)); +} + +static void psr2_program_idle_frames(struct drm_i915_private *dev_priv, + u32 idle_frames) +{ + u32 val; + + idle_frames <<= EDP_PSR2_IDLE_FRAME_SHIFT; + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); + val &= ~EDP_PSR2_IDLE_FRAME_MASK; + val |= idle_frames; + I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val); +} + +static void tgl_psr2_enable_dc3co(struct drm_i915_private *dev_priv) +{ + psr2_program_idle_frames(dev_priv, 0); + intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_DC3CO); +} + +static void tgl_psr2_disable_dc3co(struct drm_i915_private *dev_priv) +{ + int idle_frames; + + intel_display_power_set_target_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); + /* + * Restore PSR2 idle frame let's use 6 as the minimum to cover all known + * cases including the off-by-one issue that HW has in some cases. + */ + idle_frames = max(6, dev_priv->vbt.psr.idle_frames); + idle_frames = max(idle_frames, dev_priv->psr.sink_sync_latency + 1); + psr2_program_idle_frames(dev_priv, idle_frames); +} + +static void tgl_dc5_idle_thread(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), psr.idle_work.work); + + mutex_lock(&dev_priv->psr.lock); + /* If delayed work is pending, it is not idle */ + if (delayed_work_pending(&dev_priv->psr.idle_work)) + goto unlock; + + DRM_DEBUG_KMS("DC5/6 idle thread\n"); + tgl_psr2_disable_dc3co(dev_priv); +unlock: + mutex_unlock(&dev_priv->psr.lock); +} + +static void tgl_disallow_dc3co_on_psr2_exit(struct drm_i915_private *dev_priv) +{ + if (!dev_priv->psr.dc3co_enabled) + return; + + cancel_delayed_work(&dev_priv->psr.idle_work); + /* Before PSR2 exit disallow dc3co*/ + tgl_psr2_disable_dc3co(dev_priv); } static bool intel_psr2_config_valid(struct intel_dp *intel_dp, @@ -544,6 +612,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, if (!dev_priv->psr.sink_psr2_support) return false; + if (!transcoder_has_psr2(dev_priv, crtc_state->cpu_transcoder)) { + DRM_DEBUG_KMS("PSR2 not supported in transcoder %s\n", + transcoder_name(crtc_state->cpu_transcoder)); + return false; + } + /* * DSC and PSR2 cannot be enabled simultaneously. If a requested * resolution requires DSC to be enabled, priority is given to DSC @@ -554,7 +628,10 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 12) { + psr_max_h = 5120; + psr_max_v = 3200; + } else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { psr_max_h = 4096; psr_max_v = 2304; } else if (IS_GEN(dev_priv, 9)) { @@ -606,10 +683,9 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, /* * HSW spec explicitly says PSR is tied to port A. - * BDW+ platforms with DDI implementation of PSR have different - * PSR registers per transcoder and we only implement transcoder EDP - * ones. Since by Display design transcoder EDP is tied to port A - * we can safely escape based on the port A. + * BDW+ platforms have a instance of PSR registers per transcoder but + * for now it only supports one instance of PSR, so lets keep it + * hardcoded to PORT_A */ if (dig_port->base.port != PORT_A) { DRM_DEBUG_KMS("PSR condition failed: Port not supported\n"); @@ -648,9 +724,10 @@ static void intel_psr_activate(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - if (INTEL_GEN(dev_priv) >= 9) - WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); - WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); + if (transcoder_has_psr2(dev_priv, dev_priv->psr.transcoder)) + WARN_ON(I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)) & EDP_PSR2_ENABLE); + + WARN_ON(I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)) & EDP_PSR_ENABLE); WARN_ON(dev_priv->psr.active); lockdep_assert_held(&dev_priv->psr.lock); @@ -720,19 +797,46 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, if (INTEL_GEN(dev_priv) < 11) mask |= EDP_PSR_DEBUG_MASK_DISP_REG_WRITE; - I915_WRITE(EDP_PSR_DEBUG, mask); + I915_WRITE(EDP_PSR_DEBUG(dev_priv->psr.transcoder), mask); + + psr_irq_control(dev_priv); } static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, const struct intel_crtc_state *crtc_state) { struct intel_dp *intel_dp = dev_priv->psr.dp; + u32 val; WARN_ON(dev_priv->psr.enabled); dev_priv->psr.psr2_enabled = intel_psr2_enabled(dev_priv, crtc_state); dev_priv->psr.busy_frontbuffer_bits = 0; dev_priv->psr.pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; + dev_priv->psr.dc3co_enabled = !!crtc_state->dc3co_exitline; + dev_priv->psr.dc3co_exit_delay = intel_get_frame_time_us(crtc_state); + dev_priv->psr.transcoder = crtc_state->cpu_transcoder; + + /* + * If a PSR error happened and the driver is reloaded, the EDP_PSR_IIR + * will still keep the error set even after the reset done in the + * irq_preinstall and irq_uninstall hooks. + * And enabling in this situation cause the screen to freeze in the + * first time that PSR HW tries to activate so lets keep PSR disabled + * to avoid any rendering problems. + */ + if (INTEL_GEN(dev_priv) >= 12) { + val = I915_READ(TRANS_PSR_IIR(dev_priv->psr.transcoder)); + val &= EDP_PSR_ERROR(0); + } else { + val = I915_READ(EDP_PSR_IIR); + val &= EDP_PSR_ERROR(dev_priv->psr.transcoder); + } + if (val) { + dev_priv->psr.sink_not_reliable = true; + DRM_DEBUG_KMS("PSR interruption error set, not enabling PSR\n"); + return; + } DRM_DEBUG_KMS("Enabling PSR%s\n", dev_priv->psr.psr2_enabled ? "2" : "1"); @@ -782,20 +886,28 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv) u32 val; if (!dev_priv->psr.active) { - if (INTEL_GEN(dev_priv) >= 9) - WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); - WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); + if (transcoder_has_psr2(dev_priv, dev_priv->psr.transcoder)) { + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); + WARN_ON(val & EDP_PSR2_ENABLE); + } + + val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)); + WARN_ON(val & EDP_PSR_ENABLE); + return; } if (dev_priv->psr.psr2_enabled) { - val = I915_READ(EDP_PSR2_CTL); + tgl_disallow_dc3co_on_psr2_exit(dev_priv); + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); WARN_ON(!(val & EDP_PSR2_ENABLE)); - I915_WRITE(EDP_PSR2_CTL, val & ~EDP_PSR2_ENABLE); + val &= ~EDP_PSR2_ENABLE; + I915_WRITE(EDP_PSR2_CTL(dev_priv->psr.transcoder), val); } else { - val = I915_READ(EDP_PSR_CTL); + val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)); WARN_ON(!(val & EDP_PSR_ENABLE)); - I915_WRITE(EDP_PSR_CTL, val & ~EDP_PSR_ENABLE); + val &= ~EDP_PSR_ENABLE; + I915_WRITE(EDP_PSR_CTL(dev_priv->psr.transcoder), val); } dev_priv->psr.active = false; } @@ -817,10 +929,10 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp) intel_psr_exit(dev_priv); if (dev_priv->psr.psr2_enabled) { - psr_status = EDP_PSR2_STATUS; + psr_status = EDP_PSR2_STATUS(dev_priv->psr.transcoder); psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; } else { - psr_status = EDP_PSR_STATUS; + psr_status = EDP_PSR_STATUS(dev_priv->psr.transcoder); psr_status_mask = EDP_PSR_STATUS_STATE_MASK; } @@ -859,6 +971,7 @@ void intel_psr_disable(struct intel_dp *intel_dp, mutex_unlock(&dev_priv->psr.lock); cancel_work_sync(&dev_priv->psr.work); + cancel_delayed_work_sync(&dev_priv->psr.idle_work); } static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv) @@ -963,7 +1076,8 @@ int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state, * defensive enough to cover everything. */ - return __intel_wait_for_register(&dev_priv->uncore, EDP_PSR_STATUS, + return __intel_wait_for_register(&dev_priv->uncore, + EDP_PSR_STATUS(dev_priv->psr.transcoder), EDP_PSR_STATUS_STATE_MASK, EDP_PSR_STATUS_STATE_IDLE, 2, 50, out_value); @@ -979,10 +1093,10 @@ static bool __psr_wait_for_idle_locked(struct drm_i915_private *dev_priv) return false; if (dev_priv->psr.psr2_enabled) { - reg = EDP_PSR2_STATUS; + reg = EDP_PSR2_STATUS(dev_priv->psr.transcoder); mask = EDP_PSR2_STATUS_STATE_MASK; } else { - reg = EDP_PSR_STATUS; + reg = EDP_PSR_STATUS(dev_priv->psr.transcoder); mask = EDP_PSR_STATUS_STATE_MASK; } @@ -1067,7 +1181,13 @@ int intel_psr_debug_set(struct drm_i915_private *dev_priv, u64 val) old_mode = dev_priv->psr.debug & I915_PSR_DEBUG_MODE_MASK; dev_priv->psr.debug = val; - intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + + /* + * Do it right away if it's already enabled, otherwise it will be done + * when enabling the source. + */ + if (dev_priv->psr.enabled) + psr_irq_control(dev_priv); mutex_unlock(&dev_priv->psr.lock); @@ -1159,6 +1279,44 @@ void intel_psr_invalidate(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->psr.lock); } +/* + * When we will be completely rely on PSR2 S/W tracking in future, + * intel_psr_flush() will invalidate and flush the PSR for ORIGIN_FLIP + * event also therefore tgl_dc3co_flush() require to be changed + * accrodingly in future. + */ +static void +tgl_dc3co_flush(struct drm_i915_private *dev_priv, + unsigned int frontbuffer_bits, enum fb_op_origin origin) +{ + u32 delay; + + mutex_lock(&dev_priv->psr.lock); + + if (!dev_priv->psr.dc3co_enabled) + goto unlock; + + if (!dev_priv->psr.psr2_enabled || !dev_priv->psr.active) + goto unlock; + + /* + * At every frontbuffer flush flip event modified delay of delayed work, + * when delayed work schedules that means display has been idle. + */ + if (!(frontbuffer_bits & + INTEL_FRONTBUFFER_ALL_MASK(dev_priv->psr.pipe))) + goto unlock; + + tgl_psr2_enable_dc3co(dev_priv); + /* DC5/DC6 required idle frames = 6 */ + delay = 6 * dev_priv->psr.dc3co_exit_delay; + mod_delayed_work(system_wq, &dev_priv->psr.idle_work, + usecs_to_jiffies(delay)); + +unlock: + mutex_unlock(&dev_priv->psr.lock); +} + /** * intel_psr_flush - Flush PSR * @dev_priv: i915 device @@ -1178,8 +1336,10 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, if (!CAN_PSR(dev_priv)) return; - if (origin == ORIGIN_FLIP) + if (origin == ORIGIN_FLIP) { + tgl_dc3co_flush(dev_priv, frontbuffer_bits, origin); return; + } mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { @@ -1208,45 +1368,34 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, */ void intel_psr_init(struct drm_i915_private *dev_priv) { - u32 val; - if (!HAS_PSR(dev_priv)) return; - dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ? - HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE; - if (!dev_priv->psr.sink_support) return; + if (IS_HASWELL(dev_priv)) + /* + * HSW don't have PSR registers on the same space as transcoder + * so set this to a value that when subtract to the register + * in transcoder space results in the right offset for HSW + */ + dev_priv->hsw_psr_mmio_adjust = _SRD_CTL_EDP - _HSW_EDP_PSR_BASE; + if (i915_modparams.enable_psr == -1) if (INTEL_GEN(dev_priv) < 9 || !dev_priv->vbt.psr.enable) i915_modparams.enable_psr = 0; - /* - * If a PSR error happened and the driver is reloaded, the EDP_PSR_IIR - * will still keep the error set even after the reset done in the - * irq_preinstall and irq_uninstall hooks. - * And enabling in this situation cause the screen to freeze in the - * first time that PSR HW tries to activate so lets keep PSR disabled - * to avoid any rendering problems. - */ - val = I915_READ(EDP_PSR_IIR); - val &= EDP_PSR_ERROR(edp_psr_shift(TRANSCODER_EDP)); - if (val) { - DRM_DEBUG_KMS("PSR interruption error set\n"); - dev_priv->psr.sink_not_reliable = true; - } - /* Set link_standby x link_off defaults */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) /* HSW and BDW require workarounds that we don't implement. */ dev_priv->psr.link_standby = false; - else - /* For new platforms let's respect VBT back again */ + else if (INTEL_GEN(dev_priv) < 12) + /* For new platforms up to TGL let's respect VBT back again */ dev_priv->psr.link_standby = dev_priv->vbt.psr.full_link; INIT_WORK(&dev_priv->psr.work, intel_psr_work); + INIT_DELAYED_WORK(&dev_priv->psr.idle_work, tgl_dc5_idle_thread); mutex_init(&dev_priv->psr.lock); } diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index dc818826f36d..46e4de8b8cd5 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -30,7 +30,6 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, void intel_psr_init(struct drm_i915_private *dev_priv); void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state); -void intel_psr_irq_control(struct drm_i915_private *dev_priv, u32 debug); void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir); void intel_psr_short_pulse(struct intel_dp *intel_dp); int intel_psr_wait_for_idle(const struct intel_crtc_state *new_crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index adeb1c840976..47f5d87a938a 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2921,7 +2921,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) bytes[0], bytes[1]); return false; } - intel_sdvo->base.crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); + intel_sdvo->base.crtc_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C); return true; } diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.h b/drivers/gpu/drm/i915/display/intel_sdvo.h index c9e05bcdd141..a66f224aa17d 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.h +++ b/drivers/gpu/drm/i915/display/intel_sdvo.h @@ -14,6 +14,7 @@ struct drm_i915_private; enum pipe; +enum port; bool intel_sdvo_port_enabled(struct drm_i915_private *dev_priv, i915_reg_t sdvo_reg, enum pipe *pipe); diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index dea63be1964f..5ae12ab3c5b7 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -48,19 +48,6 @@ #include "intel_psr.h" #include "intel_sprite.h" -bool is_planar_yuv_format(u32 pixelformat) -{ - switch (pixelformat) { - case DRM_FORMAT_NV12: - case DRM_FORMAT_P010: - case DRM_FORMAT_P012: - case DRM_FORMAT_P016: - return true; - default: - return false; - } -} - int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, int usecs) { @@ -300,10 +287,8 @@ int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state) src_y = src->y1 >> 16; src_h = drm_rect_height(src) >> 16; - src->x1 = src_x << 16; - src->x2 = (src_x + src_w) << 16; - src->y1 = src_y << 16; - src->y2 = (src_y + src_h) << 16; + drm_rect_init(src, src_x << 16, src_y << 16, + src_w << 16, src_h << 16); if (!fb->format->is_yuv) return 0; @@ -361,6 +346,7 @@ skl_program_scaler(struct intel_plane *plane, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->base.fb; enum pipe pipe = plane->pipe; int scaler_id = plane_state->scaler_id; const struct intel_scaler *scaler = @@ -381,7 +367,7 @@ skl_program_scaler(struct intel_plane *plane, 0, INT_MAX); /* TODO: handle sub-pixel coordinates */ - if (is_planar_yuv_format(plane_state->base.fb->format->format) && + if (drm_format_info_is_yuv_semiplanar(fb->format) && !icl_is_hdr_plane(dev_priv, plane->id)) { y_hphase = skl_scaler_calc_phase(1, hscale, false); y_vphase = skl_scaler_calc_phase(1, vscale, false); @@ -554,7 +540,7 @@ skl_program_plane(struct intel_plane *plane, u32 y = plane_state->color_plane[color_plane].y; u32 src_w = drm_rect_width(&plane_state->base.src) >> 16; u32 src_h = drm_rect_height(&plane_state->base.src) >> 16; - struct intel_plane *linked = plane_state->linked_plane; + struct intel_plane *linked = plane_state->planar_linked_plane; const struct drm_framebuffer *fb = plane_state->base.fb; u8 alpha = plane_state->base.alpha >> 8; u32 plane_color_ctl = 0; @@ -653,7 +639,7 @@ skl_update_plane(struct intel_plane *plane, { int color_plane = 0; - if (plane_state->linked_plane) { + if (plane_state->planar_linked_plane) { /* Program the UV plane */ color_plane = 1; } @@ -1528,6 +1514,7 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, int src_x, src_w, src_h, crtc_w, crtc_h; const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; + unsigned int stride = plane_state->color_plane[0].stride; unsigned int cpp = fb->format->cpp[0]; unsigned int width_bytes; int min_width, min_height; @@ -1569,9 +1556,9 @@ g4x_sprite_check_scaling(struct intel_crtc_state *crtc_state, return -EINVAL; } - if (width_bytes > 4096 || fb->pitches[0] > 4096) { + if (stride > 4096) { DRM_DEBUG_KMS("Stride (%u) exceeds hardware max with scaling (%u)\n", - fb->pitches[0], 4096); + stride, 4096); return -EINVAL; } @@ -1790,7 +1777,7 @@ static int skl_plane_check_nv12_rotation(const struct intel_plane_state *plane_s int src_w = drm_rect_width(&plane_state->base.src) >> 16; /* Display WA #1106 */ - if (is_planar_yuv_format(fb->format->format) && src_w & 3 && + if (drm_format_info_is_yuv_semiplanar(fb->format) && src_w & 3 && (rotation == DRM_MODE_ROTATE_270 || rotation == (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_90))) { DRM_DEBUG_KMS("src width must be multiple of 4 for rotated planar YUV\n"); @@ -1817,7 +1804,7 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state, /* use scaler when colorkey is not required */ if (!plane_state->ckey.flags && intel_fb_scalable(fb)) { min_scale = 1; - max_scale = skl_max_scale(crtc_state, fb->format->format); + max_scale = skl_max_scale(crtc_state, fb->format); } ret = drm_atomic_helper_check_plane_state(&plane_state->base, @@ -2157,6 +2144,13 @@ static const u64 skl_plane_format_modifiers_ccs[] = { DRM_FORMAT_MOD_INVALID }; +static const u64 gen12_plane_format_modifiers_noccs[] = { + I915_FORMAT_MOD_Y_TILED, + I915_FORMAT_MOD_X_TILED, + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + static bool g4x_sprite_format_mod_supported(struct drm_plane *_plane, u32 format, u64 modifier) { @@ -2305,6 +2299,55 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, } } +static bool gen12_plane_format_mod_supported(struct drm_plane *_plane, + u32 format, u64 modifier) +{ + switch (modifier) { + case DRM_FORMAT_MOD_LINEAR: + case I915_FORMAT_MOD_X_TILED: + case I915_FORMAT_MOD_Y_TILED: + break; + default: + return false; + } + + switch (format) { + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_RGB565: + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_YUYV: + case DRM_FORMAT_YVYU: + case DRM_FORMAT_UYVY: + case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: + case DRM_FORMAT_P010: + case DRM_FORMAT_P012: + case DRM_FORMAT_P016: + case DRM_FORMAT_XVYU2101010: + case DRM_FORMAT_C8: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_Y210: + case DRM_FORMAT_Y212: + case DRM_FORMAT_Y216: + case DRM_FORMAT_XVYU12_16161616: + case DRM_FORMAT_XVYU16161616: + if (modifier == DRM_FORMAT_MOD_LINEAR || + modifier == I915_FORMAT_MOD_X_TILED || + modifier == I915_FORMAT_MOD_Y_TILED) + return true; + /* fall through */ + default: + return false; + } +} + static const struct drm_plane_funcs g4x_sprite_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -2341,6 +2384,15 @@ static const struct drm_plane_funcs skl_plane_funcs = { .format_mod_supported = skl_plane_format_mod_supported, }; +static const struct drm_plane_funcs gen12_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = intel_plane_destroy, + .atomic_duplicate_state = intel_plane_duplicate_state, + .atomic_destroy_state = intel_plane_destroy_state, + .format_mod_supported = gen12_plane_format_mod_supported, +}; + static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { @@ -2429,6 +2481,7 @@ struct intel_plane * skl_universal_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { + static const struct drm_plane_funcs *plane_funcs; struct intel_plane *plane; enum drm_plane_type plane_type; unsigned int supported_rotations; @@ -2471,11 +2524,19 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, formats = skl_get_plane_formats(dev_priv, pipe, plane_id, &num_formats); - plane->has_ccs = skl_plane_has_ccs(dev_priv, pipe, plane_id); - if (plane->has_ccs) - modifiers = skl_plane_format_modifiers_ccs; - else - modifiers = skl_plane_format_modifiers_noccs; + if (INTEL_GEN(dev_priv) >= 12) { + /* TODO: Implement support for gen-12 CCS modifiers */ + plane->has_ccs = false; + modifiers = gen12_plane_format_modifiers_noccs; + plane_funcs = &gen12_plane_funcs; + } else { + plane->has_ccs = skl_plane_has_ccs(dev_priv, pipe, plane_id); + if (plane->has_ccs) + modifiers = skl_plane_format_modifiers_ccs; + else + modifiers = skl_plane_format_modifiers_noccs; + plane_funcs = &skl_plane_funcs; + } if (plane_id == PLANE_PRIMARY) plane_type = DRM_PLANE_TYPE_PRIMARY; @@ -2485,7 +2546,7 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, possible_crtcs = BIT(pipe); ret = drm_universal_plane_init(&dev_priv->drm, &plane->base, - possible_crtcs, &skl_plane_funcs, + possible_crtcs, plane_funcs, formats, num_formats, modifiers, plane_type, "plane %d%c", plane_id + 1, @@ -2518,6 +2579,8 @@ skl_universal_plane_create(struct drm_i915_private *dev_priv, BIT(DRM_MODE_BLEND_PREMULTI) | BIT(DRM_MODE_BLEND_COVERAGE)); + drm_plane_create_zpos_immutable_property(&plane->base, plane_id); + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); return plane; @@ -2539,7 +2602,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, const u64 *modifiers; const u32 *formats; int num_formats; - int ret; + int ret, zpos; if (INTEL_GEN(dev_priv) >= 9) return skl_universal_plane_create(dev_priv, pipe, @@ -2629,6 +2692,9 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); + zpos = sprite + 1; + drm_plane_create_zpos_immutable_property(&plane->base, zpos); + drm_plane_helper_add(&plane->base, &intel_plane_helper_funcs); return plane; diff --git a/drivers/gpu/drm/i915/display/intel_sprite.h b/drivers/gpu/drm/i915/display/intel_sprite.h index 093a2d156f1e..229336214f68 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.h +++ b/drivers/gpu/drm/i915/display/intel_sprite.h @@ -17,7 +17,6 @@ struct drm_i915_private; struct intel_crtc_state; struct intel_plane_state; -bool is_planar_yuv_format(u32 pixelformat); int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, int usecs); struct intel_plane *intel_sprite_plane_create(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 85743a43bee2..7773169b7331 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -23,32 +23,38 @@ static const char *tc_port_mode_name(enum tc_port_mode mode) return names[mode]; } -static bool has_modular_fia(struct drm_i915_private *i915) -{ - if (!INTEL_INFO(i915)->display.has_modular_fia) - return false; - - return intel_uncore_read(&i915->uncore, - PORT_TX_DFLEXDPSP(FIA1)) & MODULAR_FIA_MASK; -} - -static enum phy_fia tc_port_to_fia(struct drm_i915_private *i915, - enum tc_port tc_port) +static void +tc_port_load_fia_params(struct drm_i915_private *i915, + struct intel_digital_port *dig_port) { - if (!has_modular_fia(i915)) - return FIA1; + enum port port = dig_port->base.port; + enum tc_port tc_port = intel_port_to_tc(i915, port); + u32 modular_fia; + + if (INTEL_INFO(i915)->display.has_modular_fia) { + modular_fia = intel_uncore_read(&i915->uncore, + PORT_TX_DFLEXDPSP(FIA1)); + modular_fia &= MODULAR_FIA_MASK; + } else { + modular_fia = 0; + } /* * Each Modular FIA instance houses 2 TC ports. In SOC that has more * than two TC ports, there are multiple instances of Modular FIA. */ - return tc_port / 2; + if (modular_fia) { + dig_port->tc_phy_fia = tc_port / 2; + dig_port->tc_phy_fia_idx = tc_port % 2; + } else { + dig_port->tc_phy_fia = FIA1; + dig_port->tc_phy_fia_idx = tc_port; + } } u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; u32 lane_mask; @@ -57,8 +63,23 @@ u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port) WARN_ON(lane_mask == 0xffffffff); - return (lane_mask & DP_LANE_ASSIGNMENT_MASK(tc_port)) >> - DP_LANE_ASSIGNMENT_SHIFT(tc_port); + lane_mask &= DP_LANE_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx); + return lane_mask >> DP_LANE_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx); +} + +u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port) +{ + struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + struct intel_uncore *uncore = &i915->uncore; + u32 pin_mask; + + pin_mask = intel_uncore_read(uncore, + PORT_TX_DFLEXPA1(dig_port->tc_phy_fia)); + + WARN_ON(pin_mask == 0xffffffff); + + return (pin_mask & DP_PIN_ASSIGNMENT_MASK(dig_port->tc_phy_fia_idx)) >> + DP_PIN_ASSIGNMENT_SHIFT(dig_port->tc_phy_fia_idx); } int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port) @@ -95,7 +116,6 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, int required_lanes) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); bool lane_reversal = dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; struct intel_uncore *uncore = &i915->uncore; u32 val; @@ -104,19 +124,21 @@ void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, val = intel_uncore_read(uncore, PORT_TX_DFLEXDPMLE1(dig_port->tc_phy_fia)); - val &= ~DFLEXDPMLE1_DPMLETC_MASK(tc_port); + val &= ~DFLEXDPMLE1_DPMLETC_MASK(dig_port->tc_phy_fia_idx); switch (required_lanes) { case 1: - val |= lane_reversal ? DFLEXDPMLE1_DPMLETC_ML3(tc_port) : - DFLEXDPMLE1_DPMLETC_ML0(tc_port); + val |= lane_reversal ? + DFLEXDPMLE1_DPMLETC_ML3(dig_port->tc_phy_fia_idx) : + DFLEXDPMLE1_DPMLETC_ML0(dig_port->tc_phy_fia_idx); break; case 2: - val |= lane_reversal ? DFLEXDPMLE1_DPMLETC_ML3_2(tc_port) : - DFLEXDPMLE1_DPMLETC_ML1_0(tc_port); + val |= lane_reversal ? + DFLEXDPMLE1_DPMLETC_ML3_2(dig_port->tc_phy_fia_idx) : + DFLEXDPMLE1_DPMLETC_ML1_0(dig_port->tc_phy_fia_idx); break; case 4: - val |= DFLEXDPMLE1_DPMLETC_ML3_0(tc_port); + val |= DFLEXDPMLE1_DPMLETC_ML3_0(dig_port->tc_phy_fia_idx); break; default: MISSING_CASE(required_lanes); @@ -164,9 +186,9 @@ static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port) return mask; } - if (val & TC_LIVE_STATE_TBT(tc_port)) + if (val & TC_LIVE_STATE_TBT(dig_port->tc_phy_fia_idx)) mask |= BIT(TC_PORT_TBT_ALT); - if (val & TC_LIVE_STATE_TC(tc_port)) + if (val & TC_LIVE_STATE_TC(dig_port->tc_phy_fia_idx)) mask |= BIT(TC_PORT_DP_ALT); if (intel_uncore_read(uncore, SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port)) @@ -182,7 +204,6 @@ static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port) static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; u32 val; @@ -194,14 +215,13 @@ static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port) return false; } - return val & DP_PHY_MODE_STATUS_COMPLETED(tc_port); + return val & DP_PHY_MODE_STATUS_COMPLETED(dig_port->tc_phy_fia_idx); } static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port, bool enable) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; u32 val; @@ -215,9 +235,9 @@ static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port, return false; } - val &= ~DP_PHY_MODE_STATUS_NOT_SAFE(tc_port); + val &= ~DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx); if (!enable) - val |= DP_PHY_MODE_STATUS_NOT_SAFE(tc_port); + val |= DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx); intel_uncore_write(uncore, PORT_TX_DFLEXDPCSSS(dig_port->tc_phy_fia), val); @@ -232,7 +252,6 @@ static bool icl_tc_phy_set_safe_mode(struct intel_digital_port *dig_port, static bool icl_tc_phy_is_in_safe_mode(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; u32 val; @@ -244,7 +263,7 @@ static bool icl_tc_phy_is_in_safe_mode(struct intel_digital_port *dig_port) return true; } - return !(val & DP_PHY_MODE_STATUS_NOT_SAFE(tc_port)); + return !(val & DP_PHY_MODE_STATUS_NOT_SAFE(dig_port->tc_phy_fia_idx)); } /* @@ -540,5 +559,5 @@ void intel_tc_port_init(struct intel_digital_port *dig_port, bool is_legacy) mutex_init(&dig_port->tc_lock); dig_port->tc_legacy_port = is_legacy; dig_port->tc_link_refcount = 0; - dig_port->tc_phy_fia = tc_port_to_fia(i915, tc_port); + tc_port_load_fia_params(i915, dig_port); } diff --git a/drivers/gpu/drm/i915/display/intel_tc.h b/drivers/gpu/drm/i915/display/intel_tc.h index 783d75531435..463f1b3c836f 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.h +++ b/drivers/gpu/drm/i915/display/intel_tc.h @@ -13,6 +13,7 @@ struct intel_digital_port; bool intel_tc_port_connected(struct intel_digital_port *dig_port); u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port); +u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port); int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port); void intel_tc_port_set_fia_lane_count(struct intel_digital_port *dig_port, int required_lanes); diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index b70221f5112a..70726b481244 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -961,11 +961,10 @@ intel_tv_mode_valid(struct drm_connector *connector, return MODE_CLOCK_HIGH; /* Ensure TV refresh is close to desired refresh */ - if (tv_mode && abs(tv_mode->refresh - drm_mode_vrefresh(mode) * 1000) - < 1000) - return MODE_OK; + if (abs(tv_mode->refresh - drm_mode_vrefresh(mode) * 1000) >= 1000) + return MODE_CLOCK_RANGE; - return MODE_CLOCK_RANGE; + return MODE_OK; } static int @@ -1948,9 +1947,8 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_encoder->type = INTEL_OUTPUT_TVOUT; intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; - intel_encoder->crtc_mask = (1 << 0) | (1 << 1); + intel_encoder->crtc_mask = BIT(PIPE_A) | BIT(PIPE_B); intel_encoder->cloneable = 0; - intel_encoder->base.possible_crtcs = ((1 << 0) | (1 << 1)); intel_tv->type = DRM_MODE_CONNECTOR_Unknown; /* BIOS margin values */ diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index dfcd156b5094..e3045ced4bfe 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -291,6 +291,8 @@ struct bdb_general_features { #define DVO_PORT_HDMIE 12 /* 193 */ #define DVO_PORT_DPF 13 /* N/A */ #define DVO_PORT_HDMIF 14 /* N/A */ +#define DVO_PORT_DPG 15 +#define DVO_PORT_HDMIG 16 #define DVO_PORT_MIPIA 21 /* 171 */ #define DVO_PORT_MIPIB 22 /* 171 */ #define DVO_PORT_MIPIC 23 /* 171 */ @@ -325,6 +327,7 @@ enum vbt_gmbus_ddi { #define DP_AUX_D 0x30 #define DP_AUX_E 0x50 #define DP_AUX_F 0x60 +#define DP_AUX_G 0x70 #define VBT_DP_MAX_LINK_RATE_HBR3 0 #define VBT_DP_MAX_LINK_RATE_HBR2 1 diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c new file mode 100644 index 000000000000..2ff7293986d4 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_vga.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/pci.h> +#include <linux/vgaarb.h> + +#include <drm/i915_drm.h> + +#include "i915_drv.h" +#include "intel_vga.h" + +static i915_reg_t intel_vga_cntrl_reg(struct drm_i915_private *i915) +{ + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + return VLV_VGACNTRL; + else if (INTEL_GEN(i915) >= 5) + return CPU_VGACNTRL; + else + return VGACNTRL; +} + +/* Disable the VGA plane that we never use */ +void intel_vga_disable(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + i915_reg_t vga_reg = intel_vga_cntrl_reg(dev_priv); + u8 sr1; + + /* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */ + vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); + outb(SR01, VGA_SR_INDEX); + sr1 = inb(VGA_SR_DATA); + outb(sr1 | 1 << 5, VGA_SR_DATA); + vga_put(pdev, VGA_RSRC_LEGACY_IO); + udelay(300); + + I915_WRITE(vga_reg, VGA_DISP_DISABLE); + POSTING_READ(vga_reg); +} + +void intel_vga_redisable_power_on(struct drm_i915_private *dev_priv) +{ + i915_reg_t vga_reg = intel_vga_cntrl_reg(dev_priv); + + if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) { + DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); + intel_vga_disable(dev_priv); + } +} + +void intel_vga_redisable(struct drm_i915_private *i915) +{ + intel_wakeref_t wakeref; + + /* + * This function can be called both from intel_modeset_setup_hw_state or + * at a very early point in our resume sequence, where the power well + * structures are not yet restored. Since this function is at a very + * paranoid "someone might have enabled VGA while we were not looking" + * level, just check if the power well is enabled instead of trying to + * follow the "don't touch the power well if we don't need it" policy + * the rest of the driver uses. + */ + wakeref = intel_display_power_get_if_enabled(i915, POWER_DOMAIN_VGA); + if (!wakeref) + return; + + intel_vga_redisable_power_on(i915); + + intel_display_power_put(i915, POWER_DOMAIN_VGA, wakeref); +} + +void intel_vga_reset_io_mem(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + /* + * After we re-enable the power well, if we touch VGA register 0x3d5 + * we'll get unclaimed register interrupts. This stops after we write + * anything to the VGA MSR register. The vgacon module uses this + * register all the time, so if we unbind our driver and, as a + * consequence, bind vgacon, we'll get stuck in an infinite loop at + * console_unlock(). So make here we touch the VGA MSR register, making + * sure vgacon can keep working normally without triggering interrupts + * and error messages. + */ + vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); + outb(inb(VGA_MSR_READ), VGA_MSR_WRITE); + vga_put(pdev, VGA_RSRC_LEGACY_IO); +} + +static int +intel_vga_set_state(struct drm_i915_private *i915, bool enable_decode) +{ + unsigned int reg = INTEL_GEN(i915) >= 6 ? SNB_GMCH_CTRL : INTEL_GMCH_CTRL; + u16 gmch_ctrl; + + if (pci_read_config_word(i915->bridge_dev, reg, &gmch_ctrl)) { + DRM_ERROR("failed to read control word\n"); + return -EIO; + } + + if (!!(gmch_ctrl & INTEL_GMCH_VGA_DISABLE) == !enable_decode) + return 0; + + if (enable_decode) + gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE; + else + gmch_ctrl |= INTEL_GMCH_VGA_DISABLE; + + if (pci_write_config_word(i915->bridge_dev, reg, gmch_ctrl)) { + DRM_ERROR("failed to write control word\n"); + return -EIO; + } + + return 0; +} + +static unsigned int +intel_vga_set_decode(void *cookie, bool enable_decode) +{ + struct drm_i915_private *i915 = cookie; + + intel_vga_set_state(i915, enable_decode); + + if (enable_decode) + return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | + VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; + else + return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; +} + +int intel_vga_register(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + int ret; + + /* + * If we have > 1 VGA cards, then we need to arbitrate access to the + * common VGA resources. + * + * If we are a secondary display controller (!PCI_DISPLAY_CLASS_VGA), + * then we do not take part in VGA arbitration and the + * vga_client_register() fails with -ENODEV. + */ + ret = vga_client_register(pdev, i915, NULL, intel_vga_set_decode); + if (ret && ret != -ENODEV) + return ret; + + return 0; +} + +void intel_vga_unregister(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + vga_client_register(pdev, NULL, NULL, NULL); +} diff --git a/drivers/gpu/drm/i915/display/intel_vga.h b/drivers/gpu/drm/i915/display/intel_vga.h new file mode 100644 index 000000000000..ba5b55b917f0 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_vga.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_VGA_H__ +#define __INTEL_VGA_H__ + +struct drm_i915_private; + +void intel_vga_reset_io_mem(struct drm_i915_private *i915); +void intel_vga_disable(struct drm_i915_private *i915); +void intel_vga_redisable(struct drm_i915_private *i915); +void intel_vga_redisable_power_on(struct drm_i915_private *i915); +int intel_vga_register(struct drm_i915_private *i915); +void intel_vga_unregister(struct drm_i915_private *i915); + +#endif /* __INTEL_VGA_H__ */ diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index a71b22bdd95b..50064cde0724 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -749,7 +749,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + enum pipe pipe = intel_crtc->pipe; enum port port; u32 val; bool glk_cold_boot = false; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index f99920652751..81366aa4812b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -155,7 +155,6 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence, static void clear_pages_worker(struct work_struct *work) { struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_private *i915 = w->ce->engine->i915; struct drm_i915_gem_object *obj = w->sleeve->vma->obj; struct i915_vma *vma = w->sleeve->vma; struct i915_request *rq; @@ -173,11 +172,9 @@ static void clear_pages_worker(struct work_struct *work) obj->read_domains = I915_GEM_GPU_DOMAINS; obj->write_domain = 0; - /* XXX: we need to kill this */ - mutex_lock(&i915->drm.struct_mutex); err = i915_vma_pin(vma, 0, 0, PIN_USER); if (unlikely(err)) - goto out_unlock; + goto out_signal; batch = intel_emit_vma_fill_blt(w->ce, vma, w->value); if (IS_ERR(batch)) { @@ -211,7 +208,7 @@ static void clear_pages_worker(struct work_struct *work) * keep track of the GPU activity within this vma/request, and * propagate the signal from the request to w->dma. */ - err = i915_active_ref(&vma->active, rq->timeline, rq); + err = __i915_vma_move_to_active(vma, rq); if (err) goto out_request; @@ -229,8 +226,6 @@ out_batch: intel_emit_vma_release(w->ce, batch); out_unpin: i915_vma_unpin(vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); out_signal: if (unlikely(err)) { dma_fence_set_error(&w->dma, err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 1cdfe05514c3..7b01f4605f21 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -167,97 +167,6 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } -static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) -{ - unsigned int max; - - lockdep_assert_held(&i915->contexts.mutex); - - if (INTEL_GEN(i915) >= 12) - max = GEN12_MAX_CONTEXT_HW_ID; - else if (INTEL_GEN(i915) >= 11) - max = GEN11_MAX_CONTEXT_HW_ID; - else if (USES_GUC_SUBMISSION(i915)) - /* - * When using GuC in proxy submission, GuC consumes the - * highest bit in the context id to indicate proxy submission. - */ - max = MAX_GUC_CONTEXT_HW_ID; - else - max = MAX_CONTEXT_HW_ID; - - return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp); -} - -static int steal_hw_id(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx, *cn; - LIST_HEAD(pinned); - int id = -ENOSPC; - - lockdep_assert_held(&i915->contexts.mutex); - - list_for_each_entry_safe(ctx, cn, - &i915->contexts.hw_id_list, hw_id_link) { - if (atomic_read(&ctx->hw_id_pin_count)) { - list_move_tail(&ctx->hw_id_link, &pinned); - continue; - } - - GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */ - list_del_init(&ctx->hw_id_link); - id = ctx->hw_id; - break; - } - - /* - * Remember how far we got up on the last repossesion scan, so the - * list is kept in a "least recently scanned" order. - */ - list_splice_tail(&pinned, &i915->contexts.hw_id_list); - return id; -} - -static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out) -{ - int ret; - - lockdep_assert_held(&i915->contexts.mutex); - - /* - * We prefer to steal/stall ourselves and our users over that of the - * entire system. That may be a little unfair to our users, and - * even hurt high priority clients. The choice is whether to oomkill - * something else, or steal a context id. - */ - ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); - if (unlikely(ret < 0)) { - ret = steal_hw_id(i915); - if (ret < 0) /* once again for the correct errno code */ - ret = new_hw_id(i915, GFP_KERNEL); - if (ret < 0) - return ret; - } - - *out = ret; - return 0; -} - -static void release_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - - if (list_empty(&ctx->hw_id_link)) - return; - - mutex_lock(&i915->contexts.mutex); - if (!list_empty(&ctx->hw_id_link)) { - ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id); - list_del_init(&ctx->hw_id_link); - } - mutex_unlock(&i915->contexts.mutex); -} - static void __free_engines(struct i915_gem_engines *e, unsigned int count) { while (count--) { @@ -294,27 +203,33 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) for_each_engine(engine, gt, id) { struct intel_context *ce; + if (engine->legacy_idx == INVALID_ENGINE) + continue; + + GEM_BUG_ON(engine->legacy_idx >= I915_NUM_ENGINES); + GEM_BUG_ON(e->engines[engine->legacy_idx]); + ce = intel_context_create(ctx, engine); if (IS_ERR(ce)) { - __free_engines(e, id); + __free_engines(e, e->num_engines + 1); return ERR_CAST(ce); } - e->engines[id] = ce; - e->num_engines = id + 1; + e->engines[engine->legacy_idx] = ce; + e->num_engines = max(e->num_engines, engine->legacy_idx); } + e->num_engines++; return e; } static void i915_gem_context_free(struct i915_gem_context *ctx) { - lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - release_hw_id(ctx); - if (ctx->vm) - i915_vm_put(ctx->vm); + spin_lock(&ctx->i915->gem.contexts.lock); + list_del(&ctx->link); + spin_unlock(&ctx->i915->gem.contexts.lock); free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); @@ -325,70 +240,55 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) kfree(ctx->name); put_pid(ctx->pid); - list_del(&ctx->link); mutex_destroy(&ctx->mutex); kfree_rcu(ctx, rcu); } -static void contexts_free(struct drm_i915_private *i915) +static void contexts_free_all(struct llist_node *list) { - struct llist_node *freed = llist_del_all(&i915->contexts.free_list); struct i915_gem_context *ctx, *cn; - lockdep_assert_held(&i915->drm.struct_mutex); - - llist_for_each_entry_safe(ctx, cn, freed, free_link) + llist_for_each_entry_safe(ctx, cn, list, free_link) i915_gem_context_free(ctx); } -static void contexts_free_first(struct drm_i915_private *i915) +static void contexts_flush_free(struct i915_gem_contexts *gc) { - struct i915_gem_context *ctx; - struct llist_node *freed; - - lockdep_assert_held(&i915->drm.struct_mutex); - - freed = llist_del_first(&i915->contexts.free_list); - if (!freed) - return; - - ctx = container_of(freed, typeof(*ctx), free_link); - i915_gem_context_free(ctx); + contexts_free_all(llist_del_all(&gc->free_list)); } static void contexts_free_worker(struct work_struct *work) { - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), contexts.free_work); + struct i915_gem_contexts *gc = + container_of(work, typeof(*gc), free_work); - mutex_lock(&i915->drm.struct_mutex); - contexts_free(i915); - mutex_unlock(&i915->drm.struct_mutex); + contexts_flush_free(gc); } void i915_gem_context_release(struct kref *ref) { struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); - struct drm_i915_private *i915 = ctx->i915; + struct i915_gem_contexts *gc = &ctx->i915->gem.contexts; trace_i915_context_free(ctx); - if (llist_add(&ctx->free_link, &i915->contexts.free_list)) - queue_work(i915->wq, &i915->contexts.free_work); + if (llist_add(&ctx->free_link, &gc->free_list)) + schedule_work(&gc->free_work); } static void context_close(struct i915_gem_context *ctx) { - mutex_lock(&ctx->mutex); + struct i915_address_space *vm; i915_gem_context_set_closed(ctx); - ctx->file_priv = ERR_PTR(-EBADF); - /* - * This context will never again be assinged to HW, so we can - * reuse its ID for the next context. - */ - release_hw_id(ctx); + mutex_lock(&ctx->mutex); + + vm = i915_gem_context_vm(ctx); + if (vm) + i915_vm_close(vm); + + ctx->file_priv = ERR_PTR(-EBADF); /* * The LUT uses the VMA as a backpointer to unref the object, @@ -414,7 +314,6 @@ __create_context(struct drm_i915_private *i915) return ERR_PTR(-ENOMEM); kref_init(&ctx->ref); - list_add_tail(&ctx->link, &i915->contexts.list); ctx->i915 = i915; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); mutex_init(&ctx->mutex); @@ -428,7 +327,6 @@ __create_context(struct drm_i915_private *i915) RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); /* NB: Mark all slices as needing a remap so that when the context first * loads it will restore whatever remap state already exists. If there @@ -441,6 +339,10 @@ __create_context(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + spin_lock(&i915->gem.contexts.lock); + list_add_tail(&ctx->link, &i915->gem.contexts.list); + spin_unlock(&i915->gem.contexts.lock); + return ctx; err_free: @@ -470,11 +372,11 @@ static void __apply_ppgtt(struct intel_context *ce, void *vm) static struct i915_address_space * __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - struct i915_address_space *old = ctx->vm; + struct i915_address_space *old = i915_gem_context_vm(ctx); GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - ctx->vm = i915_vm_get(vm); + rcu_assign_pointer(ctx->vm, i915_vm_open(vm)); context_apply_all(ctx, __apply_ppgtt, vm); return old; @@ -483,12 +385,12 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) static void __assign_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - if (vm == ctx->vm) + if (vm == rcu_access_pointer(ctx->vm)) return; vm = __set_ppgtt(ctx, vm); if (vm) - i915_vm_put(vm); + i915_vm_close(vm); } static void __set_timeline(struct intel_timeline **dst, @@ -515,27 +417,25 @@ static void __assign_timeline(struct i915_gem_context *ctx, } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) +i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { struct i915_gem_context *ctx; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(dev_priv)) + !HAS_EXECLISTS(i915)) return ERR_PTR(-EINVAL); - /* Reap the most stale context */ - contexts_free_first(dev_priv); + /* Reap the stale contexts */ + contexts_flush_free(&i915->gem.contexts); - ctx = __create_context(dev_priv); + ctx = __create_context(i915); if (IS_ERR(ctx)) return ctx; - if (HAS_FULL_PPGTT(dev_priv)) { + if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; - ppgtt = i915_ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(i915); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); @@ -543,14 +443,17 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) return ERR_CAST(ppgtt); } + mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { struct intel_timeline *timeline; - timeline = intel_timeline_create(&dev_priv->gt, NULL); + timeline = intel_timeline_create(&i915->gt, NULL); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); @@ -582,18 +485,11 @@ struct i915_gem_context * i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) { struct i915_gem_context *ctx; - int err; ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; - err = i915_gem_context_pin_hw_id(ctx); - if (err) { - destroy_kernel_context(&ctx); - return ERR_PTR(err); - } - i915_gem_context_clear_bannable(ctx); ctx->sched.priority = I915_USER_PRIORITY(prio); @@ -602,62 +498,41 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; } -static void init_contexts(struct drm_i915_private *i915) +static void init_contexts(struct i915_gem_contexts *gc) { - mutex_init(&i915->contexts.mutex); - INIT_LIST_HEAD(&i915->contexts.list); - - /* Using the simple ida interface, the max is limited by sizeof(int) */ - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); - ida_init(&i915->contexts.hw_ida); - INIT_LIST_HEAD(&i915->contexts.hw_id_list); + spin_lock_init(&gc->lock); + INIT_LIST_HEAD(&gc->list); - INIT_WORK(&i915->contexts.free_work, contexts_free_worker); - init_llist_head(&i915->contexts.free_list); + INIT_WORK(&gc->free_work, contexts_free_worker); + init_llist_head(&gc->free_list); } -int i915_gem_contexts_init(struct drm_i915_private *dev_priv) +int i915_gem_init_contexts(struct drm_i915_private *i915) { struct i915_gem_context *ctx; /* Reassure ourselves we are only called once */ - GEM_BUG_ON(dev_priv->kernel_context); + GEM_BUG_ON(i915->kernel_context); - init_contexts(dev_priv); + init_contexts(&i915->gem.contexts); /* lowest priority; idle task */ - ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); + ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context\n"); return PTR_ERR(ctx); } - /* - * For easy recognisablity, we want the kernel context to be 0 and then - * all user contexts will have non-zero hw_id. Kernel contexts are - * permanently pinned, so that we never suffer a stall and can - * use them from any allocation context (e.g. for evicting other - * contexts and from inside the shrinker). - */ - GEM_BUG_ON(ctx->hw_id); - GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count)); - dev_priv->kernel_context = ctx; + i915->kernel_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", - DRIVER_CAPS(dev_priv)->has_logical_contexts ? + DRIVER_CAPS(i915)->has_logical_contexts ? "logical" : "fake"); return 0; } -void i915_gem_contexts_fini(struct drm_i915_private *i915) +void i915_gem_driver_release__contexts(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->drm.struct_mutex); - destroy_kernel_context(&i915->kernel_context); - - /* Must free all deferred contexts (via flush_workqueue) first */ - GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list)); - ida_destroy(&i915->contexts.hw_ida); } static int context_idr_cleanup(int id, void *p, void *data) @@ -675,11 +550,16 @@ static int vm_idr_cleanup(int id, void *p, void *data) static int gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv) { + struct i915_address_space *vm; int ret; ctx->file_priv = fpriv; - if (ctx->vm) - ctx->vm->file = fpriv; + + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->file, fpriv); /* XXX */ + mutex_unlock(&ctx->mutex); ctx->pid = get_task_pid(current, PIDTYPE_PID); ctx->name = kasprintf(GFP_KERNEL, "%s[%d]", @@ -716,9 +596,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, idr_init(&file_priv->context_idr); idr_init_base(&file_priv->vm_idr, 1); - mutex_lock(&i915->drm.struct_mutex); ctx = i915_gem_create_context(i915, 0); - mutex_unlock(&i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err; @@ -746,6 +624,7 @@ err: void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_private *i915 = file_priv->dev_priv; idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); idr_destroy(&file_priv->context_idr); @@ -754,6 +633,8 @@ void i915_gem_context_close(struct drm_file *file) idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); idr_destroy(&file_priv->vm_idr); mutex_destroy(&file_priv->vm_idr_lock); + + contexts_flush_free(&i915->gem.contexts); } int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, @@ -846,6 +727,7 @@ struct context_barrier_task { void *data; }; +__i915_active_call static void cb_retire(struct i915_active *base) { struct context_barrier_task *cb = container_of(base, typeof(*cb), base); @@ -865,20 +747,18 @@ static int context_barrier_task(struct i915_gem_context *ctx, void (*task)(void *data), void *data) { - struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; struct i915_gem_engines_iter it; struct intel_context *ce; int err = 0; - lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!task); cb = kmalloc(sizeof(*cb), GFP_KERNEL); if (!cb) return -ENOMEM; - i915_active_init(i915, &cb->base, NULL, cb_retire); + i915_active_init(&cb->base, NULL, cb_retire); err = i915_active_acquire(&cb->base); if (err) { kfree(cb); @@ -910,7 +790,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (emit) err = emit(rq, data); if (err == 0) - err = i915_active_ref(&cb->base, rq->timeline, rq); + err = i915_active_add_request(&cb->base, rq); i915_request_add(rq); if (err) @@ -933,16 +813,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_address_space *vm; int ret; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; - /* XXX rcu acquire? */ - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); - if (ret) - return ret; - + rcu_read_lock(); vm = i915_vm_get(ctx->vm); - mutex_unlock(&ctx->i915->drm.struct_mutex); + rcu_read_unlock(); ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); if (ret) @@ -953,7 +829,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (ret < 0) goto err_unlock; - i915_vm_get(vm); + i915_vm_open(vm); args->size = 0; args->value = ret; @@ -973,7 +849,7 @@ static void set_ppgtt_barrier(void *data) if (INTEL_GEN(old->i915) < 8) gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old)); - i915_vm_put(old); + i915_vm_close(old); } static int emit_ppgtt_update(struct i915_request *rq, void *data) @@ -1003,12 +879,18 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) intel_ring_advance(rq, cs); } else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + int err; + + /* Magic required to prevent forcewake errors! */ + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + return err; cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES); + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; for (i = GEN8_3LVL_PDPES; i--; ) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); @@ -1045,34 +927,34 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (args->size) return -EINVAL; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; if (upper_32_bits(args->value)) return -ENOENT; - err = mutex_lock_interruptible(&file_priv->vm_idr_lock); - if (err) - return err; - + rcu_read_lock(); vm = idr_find(&file_priv->vm_idr, args->value); - if (vm) - i915_vm_get(vm); - mutex_unlock(&file_priv->vm_idr_lock); + if (vm && !kref_get_unless_zero(&vm->ref)) + vm = NULL; + rcu_read_unlock(); if (!vm) return -ENOENT; - err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + err = mutex_lock_interruptible(&ctx->mutex); if (err) goto out; - if (vm == ctx->vm) + if (i915_gem_context_is_closed(ctx)) { + err = -ENOENT; + goto out; + } + + if (vm == rcu_access_pointer(ctx->vm)) goto unlock; /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - mutex_lock(&ctx->mutex); lut_close(ctx); - mutex_unlock(&ctx->mutex); old = __set_ppgtt(ctx, vm); @@ -1087,13 +969,12 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, set_ppgtt_barrier, old); if (err) { - i915_vm_put(__set_ppgtt(ctx, old)); - i915_vm_put(old); + i915_vm_close(__set_ppgtt(ctx, old)); + i915_vm_close(old); } unlock: - mutex_unlock(&ctx->i915->drm.struct_mutex); - + mutex_unlock(&ctx->mutex); out: i915_vm_put(vm); return err; @@ -1112,7 +993,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE + - (CTX_R_PWR_CLK_STATE + 1) * 4; + CTX_R_PWR_CLK_STATE * 4; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); @@ -1155,8 +1036,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) } static int -__intel_context_reconfigure_sseu(struct intel_context *ce, - struct intel_sseu sseu) +intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) { int ret; @@ -1180,23 +1060,6 @@ unlock: } static int -intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) -{ - struct drm_i915_private *i915 = ce->engine->i915; - int ret; - - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - - ret = __intel_context_reconfigure_sseu(ce, sseu); - - mutex_unlock(&i915->drm.struct_mutex); - - return ret; -} - -static int user_to_context_sseu(struct drm_i915_private *i915, const struct drm_i915_gem_context_param_sseu *user, struct intel_sseu *context) @@ -1967,10 +1830,11 @@ static int clone_vm(struct i915_gem_context *dst, struct i915_gem_context *src) { struct i915_address_space *vm; + int err = 0; rcu_read_lock(); do { - vm = READ_ONCE(src->vm); + vm = rcu_dereference(src->vm); if (!vm) break; @@ -1992,7 +1856,7 @@ static int clone_vm(struct i915_gem_context *dst, * it cannot be reallocated elsewhere. */ - if (vm == READ_ONCE(src->vm)) + if (vm == rcu_access_pointer(src->vm)) break; i915_vm_put(vm); @@ -2000,11 +1864,16 @@ static int clone_vm(struct i915_gem_context *dst, rcu_read_unlock(); if (vm) { - __assign_ppgtt(dst, vm); + if (!mutex_lock_interruptible(&dst->mutex)) { + __assign_ppgtt(dst, vm); + mutex_unlock(&dst->mutex); + } else { + err = -EINTR; + } i915_vm_put(vm); } - return 0; + return err; } static int create_clone(struct i915_user_extension __user *ext, void *data) @@ -2094,12 +1963,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - ext_data.ctx = i915_gem_create_context(i915, args->flags); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(ext_data.ctx)) return PTR_ERR(ext_data.ctx); @@ -2226,12 +2090,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; - if (ctx->vm) - args->value = ctx->vm->total; - else if (to_i915(dev)->ggtt.alias) - args->value = to_i915(dev)->ggtt.alias->vm.total; + rcu_read_lock(); + if (rcu_access_pointer(ctx->vm)) + args->value = rcu_dereference(ctx->vm)->total; else args->value = to_i915(dev)->ggtt.vm.total; + rcu_read_unlock(); break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: @@ -2297,7 +2161,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_reset_stats *args = data; struct i915_gem_context *ctx; int ret; @@ -2319,7 +2183,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, */ if (capable(CAP_SYS_ADMIN)) - args->reset_count = i915_reset_count(&dev_priv->gpu_error); + args->reset_count = i915_reset_count(&i915->gpu_error); else args->reset_count = 0; @@ -2332,33 +2196,6 @@ out: return ret; } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int err = 0; - - mutex_lock(&i915->contexts.mutex); - - GEM_BUG_ON(i915_gem_context_is_closed(ctx)); - - if (list_empty(&ctx->hw_id_link)) { - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count)); - - err = assign_hw_id(i915, &ctx->hw_id); - if (err) - goto out_unlock; - - list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list); - } - - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u); - atomic_inc(&ctx->hw_id_pin_count); - -out_unlock: - mutex_unlock(&i915->contexts.mutex); - return err; -} - /* GEM context-engines iterator: for_each_gem_engine() */ struct intel_context * i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 176978608b6f..cfe80590f0ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -11,7 +11,9 @@ #include "gt/intel_context.h" +#include "i915_drv.h" #include "i915_gem.h" +#include "i915_gem_gtt.h" #include "i915_scheduler.h" #include "intel_device_info.h" @@ -112,19 +114,22 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); -static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) +static inline bool +i915_gem_context_nopreempt(const struct i915_gem_context *ctx) { - if (atomic_inc_not_zero(&ctx->hw_id_pin_count)) - return 0; + return test_bit(CONTEXT_NOPREEMPT, &ctx->flags); +} - return __i915_gem_context_pin_hw_id(ctx); +static inline void +i915_gem_context_set_nopreempt(struct i915_gem_context *ctx) +{ + set_bit(CONTEXT_NOPREEMPT, &ctx->flags); } -static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx) +static inline void +i915_gem_context_clear_nopreempt(struct i915_gem_context *ctx) { - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u); - atomic_dec(&ctx->hw_id_pin_count); + clear_bit(CONTEXT_NOPREEMPT, &ctx->flags); } static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) @@ -133,8 +138,8 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) } /* i915_gem_context.c */ -int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); -void i915_gem_contexts_fini(struct drm_i915_private *dev_priv); +int __must_check i915_gem_init_contexts(struct drm_i915_private *i915); +void i915_gem_driver_release__contexts(struct drm_i915_private *i915); int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); @@ -173,6 +178,27 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline struct i915_address_space * +i915_gem_context_vm(struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex)); +} + +static inline struct i915_address_space * +i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx) +{ + struct i915_address_space *vm; + + rcu_read_lock(); + vm = rcu_dereference(ctx->vm); + if (!vm) + vm = &ctx->i915->ggtt.vm; + vm = i915_vm_get(vm); + rcu_read_unlock(); + + return vm; +} + static inline struct i915_gem_engines * i915_gem_context_engines(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 260d59cc3de8..fe97b8ba4fda 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -88,7 +88,7 @@ struct i915_gem_context { * In other modes, this is a NULL pointer with the expectation that * the caller uses the shared global GTT. */ - struct i915_address_space *vm; + struct i915_address_space __rcu *vm; /** * @pid: process id of creator @@ -146,24 +146,7 @@ struct i915_gem_context { #define CONTEXT_CLOSED 1 #define CONTEXT_FORCE_SINGLE_SUBMISSION 2 #define CONTEXT_USER_ENGINES 3 - - /** - * @hw_id: - unique identifier for the context - * - * The hardware needs to uniquely identify the context for a few - * functions like fault reporting, PASID, scheduling. The - * &drm_i915_private.context_hw_ida is used to assign a unqiue - * id for the lifetime of the context. - * - * @hw_id_pin_count: - number of times this context had been pinned - * for use (should be, at most, once per engine). - * - * @hw_id_link: - all contexts with an assigned id are tracked - * for possible repossession. - */ - unsigned int hw_id; - atomic_t hw_id_pin_count; - struct list_head hw_id_link; +#define CONTEXT_NOPREEMPT 4 struct mutex mutex; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 9c58e8fac1d9..9937b4c341f1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -27,7 +27,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) { - if (!READ_ONCE(obj->pin_global)) + if (!i915_gem_object_is_framebuffer(obj)) return; i915_gem_object_lock(obj); @@ -288,14 +288,21 @@ restart: if (!drm_mm_node_allocated(&vma->node)) continue; - ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); + /* Wait for an earlier async bind, need to rewrite it */ + ret = i915_vma_sync(vma); + if (ret) + return ret; + + ret = i915_vma_bind(vma, cache_level, PIN_UPDATE, NULL); if (ret) return ret; } } - list_for_each_entry(vma, &obj->vma.list, obj_link) - vma->node.color = cache_level; + list_for_each_entry(vma, &obj->vma.list, obj_link) { + if (i915_vm_has_cache_coloring(vma->vm)) + vma->node.color = cache_level; + } i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; /* Always invalidate stale cachelines */ @@ -389,16 +396,11 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (ret) goto out; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - goto out; - ret = i915_gem_object_lock_interruptible(obj); if (ret == 0) { ret = i915_gem_object_set_cache_level(obj, level); i915_gem_object_unlock(obj); } - mutex_unlock(&i915->drm.struct_mutex); out: i915_gem_object_put(obj); @@ -422,12 +424,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, assert_object_held(obj); - /* Mark the global pin early so that we account for the - * display coherency whilst setting up the cache domains. - */ - obj->pin_global++; - - /* The display engine is not coherent with the LLC cache on gen6. As + /* + * The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is * done with uncached PTEs. This is lowest common denominator for all * chipsets. @@ -439,12 +437,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, ret = i915_gem_object_set_cache_level(obj, HAS_WT(to_i915(obj->base.dev)) ? I915_CACHE_WT : I915_CACHE_NONE); - if (ret) { - vma = ERR_PTR(ret); - goto err_unpin_global; - } + if (ret) + return ERR_PTR(ret); - /* As the user may map the buffer once pinned in the display plane + /* + * As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. However, * it may simply be too big to fit into mappable, in which case @@ -461,22 +458,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); if (IS_ERR(vma)) - goto err_unpin_global; + return vma; vma->display_alignment = max_t(u64, vma->display_alignment, alignment); __i915_gem_object_flush_for_display(obj); - /* It should now be out of any other write domains, and we can update + /* + * It should now be out of any other write domains, and we can update * the domain values for our changes. */ obj->read_domains |= I915_GEM_DOMAIN_GTT; return vma; - -err_unpin_global: - obj->pin_global--; - return vma; } static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) @@ -491,6 +485,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) if (!drm_mm_node_allocated(&vma->node)) continue; + GEM_BUG_ON(vma->vm != &i915->ggtt.vm); list_move_tail(&vma->vm_link, &vma->vm->bound_list); } mutex_unlock(&i915->ggtt.vm.mutex); @@ -500,7 +495,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) spin_lock_irqsave(&i915->mm.obj_lock, flags); - if (obj->mm.madv == I915_MADV_WILLNEED) + if (obj->mm.madv == I915_MADV_WILLNEED && + !atomic_read(&obj->mm.shrink_pin)) list_move_tail(&obj->mm.link, &i915->mm.shrink_list); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); @@ -514,12 +510,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) assert_object_held(obj); - if (WARN_ON(obj->pin_global == 0)) - return; - - if (--obj->pin_global == 0) - vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - /* Bump the LRU to try and avoid premature eviction whilst flipping */ i915_gem_object_bump_inactive_ggtt(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 493f07806b08..e96901888323 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -252,6 +252,7 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; + struct intel_context *ce; struct i915_request *rq; u32 *rq_cmd; unsigned int rq_size; @@ -697,7 +698,9 @@ static int eb_reserve(struct i915_execbuffer *eb) case 1: /* Too fragmented, unbind everything and retry */ + mutex_lock(&eb->context->vm->mutex); err = i915_gem_evict_vm(eb->context->vm); + mutex_unlock(&eb->context->vm->mutex); if (err) return err; break; @@ -725,7 +728,7 @@ static int eb_select_context(struct i915_execbuffer *eb) return -ENOENT; eb->gem_context = ctx; - if (ctx->vm) + if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; eb->context_flags = 0; @@ -880,6 +883,9 @@ static void eb_destroy(const struct i915_execbuffer *eb) { GEM_BUG_ON(eb->reloc_cache.rq); + if (eb->reloc_cache.ce) + intel_context_put(eb->reloc_cache.ce); + if (eb->lut_size > 0) kfree(eb->buckets); } @@ -903,6 +909,7 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->has_fence = cache->gen < 4; cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; + cache->ce = NULL; cache->rq = NULL; cache->rq_size = 0; } @@ -967,7 +974,9 @@ static void reloc_cache_reset(struct reloc_cache *cache) ggtt->vm.clear_range(&ggtt->vm, cache->node.start, cache->node.size); + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(&cache->node); + mutex_unlock(&ggtt->vm.mutex); } else { i915_vma_unpin((struct i915_vma *)cache->node.mm); } @@ -1042,11 +1051,13 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, PIN_NOEVICT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); + mutex_lock(&ggtt->vm.mutex); err = drm_mm_insert_node_in_range (&ggtt->vm.mm, &cache->node, PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); + mutex_unlock(&ggtt->vm.mutex); if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { @@ -1145,7 +1156,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, u32 *cmd; int err; - pool = intel_engine_pool_get(&eb->engine->pool, PAGE_SIZE); + pool = intel_engine_get_pool(eb->engine, PAGE_SIZE); if (IS_ERR(pool)) return PTR_ERR(pool); @@ -1168,7 +1179,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_request_create(eb->context); + rq = intel_context_create_request(cache->ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -1239,6 +1250,29 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (!intel_engine_can_store_dword(eb->engine)) return ERR_PTR(-ENODEV); + if (!cache->ce) { + struct intel_context *ce; + + /* + * The CS pre-parser can pre-fetch commands across + * memory sync points and starting gen12 it is able to + * pre-fetch across BB_START and BB_END boundaries + * (within the same context). We therefore use a + * separate context gen12+ to guarantee that the reloc + * writes land before the parser gets to the target + * memory location. + */ + if (cache->gen >= 12) + ce = intel_context_create(eb->context->gem_context, + eb->engine); + else + ce = intel_context_get(eb->context); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + cache->ce = ce; + } + err = __reloc_gpu_alloc(eb, vma, len); if (unlikely(err)) return ERR_PTR(err); @@ -1388,7 +1422,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && IS_GEN(eb->i915, 6)) { err = i915_vma_bind(target, target->obj->cache_level, - PIN_GLOBAL); + PIN_GLOBAL, NULL); if (WARN_ONCE(err, "Unexpected failure to bind target VMA!")) return err; @@ -1961,7 +1995,7 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) struct i915_vma *vma; int err; - pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len); + pool = intel_engine_get_pool(eb->engine, eb->batch_len); if (IS_ERR(pool)) return ERR_CAST(pool); @@ -2043,6 +2077,9 @@ static int eb_submit(struct i915_execbuffer *eb) if (err) return err; + if (i915_gem_context_nopreempt(eb->gem_context)) + eb->request->flags |= I915_REQUEST_NOPREEMPT; + return 0; } @@ -2112,35 +2149,6 @@ static struct i915_request *eb_throttle(struct intel_context *ce) return i915_request_get(rq); } -static int -__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) -{ - int err; - - if (likely(atomic_inc_not_zero(&ce->pin_count))) - return 0; - - err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex); - if (err) - return err; - - err = __intel_context_do_pin(ce); - mutex_unlock(&eb->i915->drm.struct_mutex); - - return err; -} - -static void -__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce) -{ - if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) - return; - - mutex_lock(&eb->i915->drm.struct_mutex); - intel_context_unpin(ce); - mutex_unlock(&eb->i915->drm.struct_mutex); -} - static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) { struct intel_timeline *tl; @@ -2160,7 +2168,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - err = __eb_pin_context(eb, ce); + err = intel_context_pin(ce); if (err) return err; @@ -2204,7 +2212,7 @@ err_exit: intel_context_exit(ce); intel_context_timeline_unlock(tl); err_unpin: - __eb_unpin_context(eb, ce); + intel_context_unpin(ce); return err; } @@ -2217,7 +2225,7 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) intel_context_exit(ce); mutex_unlock(&tl->mutex); - __eb_unpin_context(eb, ce); + intel_context_unpin(ce); } static unsigned int diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index 0c41e04ab8fa..5ae694c24df4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -117,13 +117,6 @@ create_st: goto err; } - /* Mark the pages as dontneed whilst they are still pinned. As soon - * as they are unpinned they are allowed to be reaped by the shrinker, - * and the caller is expected to repopulate - the contents of this - * object are only valid whilst active and pinned. - */ - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -143,7 +136,6 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, internal_free_pages(pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { @@ -188,6 +180,15 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &i915_gem_object_internal_ops); + /* + * Mark the object as volatile, such that the pages are marked as + * dontneed whilst they are still pinned. As soon as they are unpinned + * they are allowed to be reaped by the shrinker, and the caller is + * expected to repopulate - the contents of this object are only valid + * whilst active and pinned. + */ + i915_gem_object_set_volatile(obj); + obj->read_domains = I915_GEM_DOMAIN_CPU; obj->write_domain = I915_GEM_DOMAIN_CPU; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 261c9bd83f51..fd4122d8c0a9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -8,6 +8,7 @@ #include <linux/sizes.h> #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_gem_gtt.h" @@ -245,21 +246,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) wakeref = intel_runtime_pm_get(rpm); - srcu = intel_gt_reset_trylock(ggtt->vm.gt); - if (srcu < 0) { - ret = srcu; - goto err_rpm; - } - - ret = i915_mutex_lock_interruptible(dev); + ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu); if (ret) - goto err_reset; - - /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { - ret = -EFAULT; - goto err_unlock; - } + goto err_rpm; /* Now pin it into the GTT as needed */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, @@ -287,10 +276,19 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) view.type = I915_GGTT_VIEW_PARTIAL; vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); } + + /* The entire mappable GGTT is pinned? Unexpected! */ + GEM_BUG_ON(vma == ERR_PTR(-ENOSPC)); } if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto err_unlock; + goto err_reset; + } + + /* Access to snoopable pages through the GTT is incoherent. */ + if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { + ret = -EFAULT; + goto err_unpin; } ret = i915_vma_pin_fence(vma); @@ -318,14 +316,16 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); - i915_vma_set_ggtt_write(vma); + if (write) { + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + i915_vma_set_ggtt_write(vma); + obj->mm.dirty = true; + } err_fence: i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); -err_unlock: - mutex_unlock(&dev->struct_mutex); err_reset: intel_gt_reset_unlock(ggtt->vm.gt, srcu); err_rpm: @@ -333,23 +333,20 @@ err_rpm: i915_gem_object_unpin_pages(obj); err: switch (ret) { - case -EIO: - /* - * We eat errors when the gpu is terminally wedged to avoid - * userspace unduly crashing (gl has no provisions for mmaps to - * fail). But any other -EIO isn't ours (e.g. swap in failure) - * and so needs to be reported. - */ - if (!intel_gt_is_wedged(ggtt->vm.gt)) - return VM_FAULT_SIGBUS; - /* else, fall through */ - case -EAGAIN: - /* - * EAGAIN means the gpu is hung and we'll wait for the error - * handler to reset everything when re-faulting in - * i915_mutex_lock_interruptible. - */ + default: + WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); + /* fallthrough */ + case -EIO: /* shmemfs failure from swap device */ + case -EFAULT: /* purged object */ + case -ENODEV: /* bad object, how did you get here! */ + return VM_FAULT_SIGBUS; + + case -ENOSPC: /* shmemfs allocation failure */ + case -ENOMEM: /* our allocation failure */ + return VM_FAULT_OOM; + case 0: + case -EAGAIN: case -ERESTARTSYS: case -EINTR: case -EBUSY: @@ -358,14 +355,6 @@ err: * already did the job. */ return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - case -ENOSPC: - case -EFAULT: - return VM_FAULT_SIGBUS; - default: - WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret); - return VM_FAULT_SIGBUS; } } @@ -436,6 +425,7 @@ out: static int create_mmap_offset(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_gt *gt = &i915->gt; int err; err = drm_gem_create_mmap_offset(&obj->base); @@ -443,21 +433,12 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj) return 0; /* Attempt to reap some mmap space from dead objects */ - do { - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (err) - break; - - i915_gem_drain_freed_objects(i915); - err = drm_gem_create_mmap_offset(&obj->base); - if (!err) - break; + err = intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT); + if (err) + return err; - } while (flush_delayed_work(&i915->gem.retire_work)); - - return err; + i915_gem_drain_freed_objects(i915); + return drm_gem_create_mmap_offset(&obj->base); } int @@ -473,10 +454,16 @@ i915_gem_mmap_gtt(struct drm_file *file, if (!obj) return -ENOENT; + if (i915_gem_object_never_bind_ggtt(obj)) { + ret = -ENODEV; + goto out; + } + ret = create_mmap_offset(obj); if (ret == 0) *offset = drm_vma_node_offset_addr(&obj->base.vma_node); +out: i915_gem_object_put(obj); return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index d7855dc5a5c5..dbf9be9a79f4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -155,21 +155,30 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, wakeref = intel_runtime_pm_get(&i915->runtime_pm); llist_for_each_entry_safe(obj, on, freed, freed) { - struct i915_vma *vma, *vn; - trace_i915_gem_object_destroy(obj); - mutex_lock(&i915->drm.struct_mutex); - - list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { - GEM_BUG_ON(i915_vma_is_active(vma)); - vma->flags &= ~I915_VMA_PIN_MASK; - i915_vma_destroy(vma); + if (!list_empty(&obj->vma.list)) { + struct i915_vma *vma; + + /* + * Note that the vma keeps an object reference while + * it is active, so it *should* not sleep while we + * destroy it. Our debug code errs insits it *might*. + * For the moment, play along. + */ + spin_lock(&obj->vma.lock); + while ((vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { + GEM_BUG_ON(vma->obj != obj); + spin_unlock(&obj->vma.lock); + + i915_vma_destroy(vma); + + spin_lock(&obj->vma.lock); + } + spin_unlock(&obj->vma.lock); } - GEM_BUG_ON(!list_empty(&obj->vma.list)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); - - mutex_unlock(&i915->drm.struct_mutex); GEM_BUG_ON(atomic_read(&obj->bind_count)); GEM_BUG_ON(obj->userfault_count); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 5efb9936e05b..85921796851f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -25,10 +25,11 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj); void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops); struct drm_i915_gem_object * -i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size); +i915_gem_object_create_shmem(struct drm_i915_private *i915, + resource_size_t size); struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915, - const void *data, size_t size); + const void *data, resource_size_t size); extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops; void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, @@ -106,6 +107,11 @@ static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) dma_resv_lock(obj->base.resv, NULL); } +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +{ + return dma_resv_trylock(obj->base.resv); +} + static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj) { @@ -135,27 +141,58 @@ i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj) } static inline bool +i915_gem_object_is_contiguous(const struct drm_i915_gem_object *obj) +{ + return obj->flags & I915_BO_ALLOC_CONTIGUOUS; +} + +static inline bool +i915_gem_object_is_volatile(const struct drm_i915_gem_object *obj) +{ + return obj->flags & I915_BO_ALLOC_VOLATILE; +} + +static inline void +i915_gem_object_set_volatile(struct drm_i915_gem_object *obj) +{ + obj->flags |= I915_BO_ALLOC_VOLATILE; +} + +static inline bool +i915_gem_object_type_has(const struct drm_i915_gem_object *obj, + unsigned long flags) +{ + return obj->ops->flags & flags; +} + +static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_HAS_STRUCT_PAGE); } static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE); } static inline bool i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY); +} + +static inline bool +i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj) +{ + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_GGTT); } static inline bool i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) { - return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL; + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL); } static inline bool @@ -406,7 +443,8 @@ static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) return true; - return obj->pin_global; /* currently in use by HW, keep flushed */ + /* Currently in use by HW (display engine)? Keep flushed. */ + return i915_gem_object_is_framebuffer(obj); } static inline void __start_cpu_write(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index 6415f9a17e2d..5bd8de124d74 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -32,7 +32,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, count = div_u64(vma->size, block_size); size = (1 + 8 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_engine_pool_get(&ce->engine->pool, size); + pool = intel_engine_get_pool(ce->engine, size); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; @@ -216,7 +216,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, count = div_u64(dst->size, block_size); size = (1 + 11 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_engine_pool_get(&ce->engine->pool, size); + pool = intel_engine_get_pool(ce->engine, size); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index ede0eb4218a8..a387e3ee728b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -8,6 +8,7 @@ #define __I915_GEM_OBJECT_TYPES_H__ #include <drm/drm_gem.h> +#include <uapi/drm/i915_drm.h> #include "i915_active.h" #include "i915_selftest.h" @@ -32,7 +33,8 @@ struct drm_i915_gem_object_ops { #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) #define I915_GEM_OBJECT_IS_PROXY BIT(2) -#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3) +#define I915_GEM_OBJECT_NO_GGTT BIT(3) +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(4) /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -117,6 +119,11 @@ struct drm_i915_gem_object { I915_SELFTEST_DECLARE(struct list_head st_link); + unsigned long flags; +#define I915_BO_ALLOC_CONTIGUOUS BIT(0) +#define I915_BO_ALLOC_VOLATILE BIT(1) +#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) + /* * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit @@ -152,17 +159,30 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ atomic_t bind_count; - /** Count of how many global VMA are currently pinned for use by HW */ - unsigned int pin_global; struct { struct mutex lock; /* protects the pages and their use */ atomic_t pages_pin_count; + atomic_t shrink_pin; + + /** + * Memory region for this object. + */ + struct intel_memory_region *region; + /** + * List of memory region blocks allocated for this object. + */ + struct list_head blocks; + /** + * Element within memory_region->objects or region->purgeable + * if the object is marked as DONTNEED. Access is protected by + * region->obj_lock. + */ + struct list_head region_link; struct sg_table *pages; void *mapping; - /* TODO: whack some of this into the error state */ struct i915_page_sizes { /** * The sg mask of the pages sg_table. i.e the mask of diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 18f0ce0135c1..b0ec0959c13f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -18,6 +18,9 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->mm.lock); + if (i915_gem_object_is_volatile(obj)) + obj->mm.madv = I915_MADV_DONTNEED; + /* Make the pages coherent with the GPU (flushing any swapin). */ if (obj->cache_dirty) { obj->write_domain = 0; @@ -71,6 +74,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, list = &i915->mm.shrink_list; list_add_tail(&obj->mm.link, list); + atomic_set(&obj->mm.shrink_pin, 0); spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } } @@ -159,6 +163,9 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) if (IS_ERR_OR_NULL(pages)) return pages; + if (i915_gem_object_is_volatile(obj)) + obj->mm.madv = I915_MADV_WILLNEED; + i915_gem_object_make_unshrinkable(obj); if (obj->mm.mapping) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 768356908160..8043ff63d73f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -16,6 +16,7 @@ #include "gt/intel_gt.h" #include "i915_drv.h" #include "i915_gem_object.h" +#include "i915_gem_region.h" #include "i915_scatterlist.h" static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) @@ -191,8 +192,10 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) /* Perma-pin (until release) the physical set of pages */ __i915_gem_object_pin_pages(obj); - if (!IS_ERR_OR_NULL(pages)) + if (!IS_ERR_OR_NULL(pages)) { i915_gem_shmem_ops.put_pages(obj, pages); + i915_gem_object_release_memory_region(obj); + } mutex_unlock(&obj->mm.lock); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 92e53c25424c..7987b54fb1f5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -7,79 +7,9 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" -#include "i915_globals.h" - -static void call_idle_barriers(struct intel_engine_cs *engine) -{ - struct llist_node *node, *next; - - llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { - struct i915_active_request *active = - container_of((struct list_head *)node, - typeof(*active), link); - - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, NULL); - } -} - -static void i915_gem_park(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - lockdep_assert_held(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) - call_idle_barriers(engine); /* cleanup after wedging */ - - i915_vma_parked(i915); - - i915_globals_park(); -} - -static void idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.idle_work); - bool park; - - cancel_delayed_work_sync(&i915->gem.retire_work); - mutex_lock(&i915->drm.struct_mutex); - - intel_wakeref_lock(&i915->gt.wakeref); - park = (!intel_wakeref_is_active(&i915->gt.wakeref) && - !work_pending(work)); - intel_wakeref_unlock(&i915->gt.wakeref); - if (park) - i915_gem_park(i915); - else - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); - - mutex_unlock(&i915->drm.struct_mutex); -} - -static void retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.retire_work.work); - - /* Come back later if the device is busy... */ - if (mutex_trylock(&i915->drm.struct_mutex)) { - i915_retire_requests(i915); - mutex_unlock(&i915->drm.struct_mutex); - } - - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); -} static int pm_notifier(struct notifier_block *nb, unsigned long action, @@ -90,14 +20,10 @@ static int pm_notifier(struct notifier_block *nb, switch (action) { case INTEL_GT_UNPARK: - i915_globals_unpark(); - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); break; case INTEL_GT_PARK: - queue_work(i915->wq, &i915->gem.idle_work); + i915_vma_parked(i915); break; } @@ -108,26 +34,21 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) { bool result = !intel_gt_is_wedged(gt); - do { - if (i915_gem_wait_for_idle(gt->i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { - /* XXX hide warning from gem_eio */ - if (i915_modparams.reset) { - dev_err(gt->i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* - * Forcibly cancel outstanding work and leave - * the gpu quiet. - */ - intel_gt_set_wedged(gt); - result = false; + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(gt->i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); } - } while (i915_retire_requests(gt->i915) && result); + + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + result = false; + } if (intel_gt_pm_wait_for_idle(gt)) result = false; @@ -140,6 +61,24 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915) return switch_to_kernel_context_sync(&i915->gt); } +static void user_forcewake(struct intel_gt *gt, bool suspend) +{ + int count = atomic_read(>->user_wakeref); + + /* Inside suspend/resume so single threaded, no races to worry about. */ + if (likely(!count)) + return; + + intel_gt_pm_get(gt); + if (suspend) { + GEM_BUG_ON(count > atomic_read(>->wakeref.count)); + atomic_sub(count, >->wakeref.count); + } else { + atomic_add(count, >->wakeref.count); + } + intel_gt_pm_put(gt); +} + void i915_gem_suspend(struct drm_i915_private *i915) { GEM_TRACE("\n"); @@ -147,7 +86,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); - mutex_lock(&i915->drm.struct_mutex); + user_forcewake(&i915->gt, true); /* * We have to flush all the executing contexts to main memory so @@ -158,15 +97,12 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - switch_to_kernel_context_sync(&i915->gt); - - mutex_unlock(&i915->drm.struct_mutex); + intel_gt_suspend(&i915->gt); + intel_uc_suspend(&i915->gt.uc); cancel_delayed_work_sync(&i915->gt.hangcheck.work); i915_gem_drain_freed_objects(i915); - - intel_uc_suspend(&i915->gt.uc); } static struct drm_i915_gem_object *first_mm_object(struct list_head *list) @@ -238,13 +174,9 @@ void i915_gem_resume(struct drm_i915_private *i915) { GEM_TRACE("\n"); - mutex_lock(&i915->drm.struct_mutex); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); - - if (i915_gem_init_hw(i915)) + if (intel_gt_init_hw(&i915->gt)) goto err_wedged; /* @@ -261,9 +193,10 @@ void i915_gem_resume(struct drm_i915_private *i915) if (!i915_gem_load_power_context(i915)) goto err_wedged; + user_forcewake(&i915->gt, false); + out_unlock: intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - mutex_unlock(&i915->drm.struct_mutex); return; err_wedged: @@ -277,9 +210,6 @@ err_wedged: void i915_gem_init__pm(struct drm_i915_private *i915) { - INIT_WORK(&i915->gem.idle_work, idle_work_handler); - INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); - i915->gem.pm_notifier.notifier_call = pm_notifier; blocking_notifier_chain_register(&i915->gt.pm_notifications, &i915->gem.pm_notifier); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c new file mode 100644 index 000000000000..2f7bcfb9c964 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "i915_gem_region.h" +#include "i915_drv.h" +#include "i915_trace.h" + +void +i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + __intel_memory_region_put_pages_buddy(obj->mm.region, &obj->mm.blocks); + + obj->mm.dirty = false; + sg_free_table(pages); + kfree(pages); +} + +int +i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mem = obj->mm.region; + struct list_head *blocks = &obj->mm.blocks; + resource_size_t size = obj->base.size; + resource_size_t prev_end; + struct i915_buddy_block *block; + unsigned int flags; + struct sg_table *st; + struct scatterlist *sg; + unsigned int sg_page_sizes; + int ret; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, size >> ilog2(mem->mm.chunk_size), GFP_KERNEL)) { + kfree(st); + return -ENOMEM; + } + + flags = I915_ALLOC_MIN_PAGE_SIZE; + if (obj->flags & I915_BO_ALLOC_CONTIGUOUS) + flags |= I915_ALLOC_CONTIGUOUS; + + ret = __intel_memory_region_get_pages_buddy(mem, size, flags, blocks); + if (ret) + goto err_free_sg; + + GEM_BUG_ON(list_empty(blocks)); + + sg = st->sgl; + st->nents = 0; + sg_page_sizes = 0; + prev_end = (resource_size_t)-1; + + list_for_each_entry(block, blocks, link) { + u64 block_size, offset; + + block_size = min_t(u64, size, + i915_buddy_block_size(&mem->mm, block)); + offset = i915_buddy_block_offset(block); + + GEM_BUG_ON(overflows_type(block_size, sg->length)); + + if (offset != prev_end || + add_overflows_t(typeof(sg->length), sg->length, block_size)) { + if (st->nents) { + sg_page_sizes |= sg->length; + sg = __sg_next(sg); + } + + sg_dma_address(sg) = mem->region.start + offset; + sg_dma_len(sg) = block_size; + + sg->length = block_size; + + st->nents++; + } else { + sg->length += block_size; + sg_dma_len(sg) += block_size; + } + + prev_end = offset + block_size; + }; + + sg_page_sizes |= sg->length; + sg_mark_end(sg); + i915_sg_trim(st); + + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; + +err_free_sg: + sg_free_table(st); + kfree(st); + return ret; +} + +void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, + struct intel_memory_region *mem, + unsigned long flags) +{ + INIT_LIST_HEAD(&obj->mm.blocks); + obj->mm.region = intel_memory_region_get(mem); + obj->flags |= flags; + + mutex_lock(&mem->objects.lock); + + if (obj->flags & I915_BO_ALLOC_VOLATILE) + list_add(&obj->mm.region_link, &mem->objects.purgeable); + else + list_add(&obj->mm.region_link, &mem->objects.list); + + mutex_unlock(&mem->objects.lock); +} + +void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj) +{ + struct intel_memory_region *mem = obj->mm.region; + + mutex_lock(&mem->objects.lock); + list_del(&obj->mm.region_link); + mutex_unlock(&mem->objects.lock); + + intel_memory_region_put(mem); +} + +struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct drm_i915_gem_object *obj; + + /* + * NB: Our use of resource_size_t for the size stems from using struct + * resource for the mem->region. We might need to revisit this in the + * future. + */ + + GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS); + + if (!mem) + return ERR_PTR(-ENODEV); + + size = round_up(size, mem->min_page_size); + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT)); + + /* + * XXX: There is a prevalence of the assumption that we fit the + * object's page count inside a 32bit _signed_ variable. Let's document + * this and catch if we ever need to fix it. In the meantime, if you do + * spot such a local variable, please consider fixing! + */ + + if (size >> PAGE_SHIFT > INT_MAX) + return ERR_PTR(-E2BIG); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = mem->ops->create_object(mem, size, flags); + if (!IS_ERR(obj)) + trace_i915_gem_object_create(obj); + + return obj; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h new file mode 100644 index 000000000000..f2ff6f8bff74 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_REGION_H__ +#define __I915_GEM_REGION_H__ + +#include <linux/types.h> + +struct intel_memory_region; +struct drm_i915_gem_object; +struct sg_table; + +int i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj); +void i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, + struct sg_table *pages); + +void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj, + struct intel_memory_region *mem, + unsigned long flags); +void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj); + +struct drm_i915_gem_object * +i915_gem_object_create_region(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); + +#endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4c4954e8ce0a..be68b76e13b3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -7,7 +7,9 @@ #include <linux/pagevec.h> #include <linux/swap.h> +#include "gem/i915_gem_region.h" #include "i915_drv.h" +#include "i915_gemfs.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" #include "i915_trace.h" @@ -26,6 +28,7 @@ static void check_release_pagevec(struct pagevec *pvec) static int shmem_get_pages(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_memory_region *mem = obj->mm.region; const unsigned long page_count = obj->base.size / PAGE_SIZE; unsigned long i; struct address_space *mapping; @@ -52,7 +55,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) * If there's no chance of allocating enough pages for the whole * object, bail early. */ - if (page_count > totalram_pages()) + if (obj->base.size > resource_size(&mem->region)) return -ENOMEM; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -417,6 +420,8 @@ shmem_pwrite(struct drm_i915_gem_object *obj, static void shmem_release(struct drm_i915_gem_object *obj) { + i915_gem_object_release_memory_region(obj); + fput(obj->base.filp); } @@ -434,9 +439,9 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = { .release = shmem_release, }; -static int create_shmem(struct drm_i915_private *i915, - struct drm_gem_object *obj, - size_t size) +static int __create_shmem(struct drm_i915_private *i915, + struct drm_gem_object *obj, + resource_size_t size) { unsigned long flags = VM_NORESERVE; struct file *filp; @@ -455,31 +460,23 @@ static int create_shmem(struct drm_i915_private *i915, return 0; } -struct drm_i915_gem_object * -i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) +static struct drm_i915_gem_object * +create_shmem(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { + struct drm_i915_private *i915 = mem->i915; struct drm_i915_gem_object *obj; struct address_space *mapping; unsigned int cache_level; gfp_t mask; int ret; - /* There is a prevalence of the assumption that we fit the object's - * page count inside a 32bit _signed_ variable. Let's document this and - * catch if we ever need to fix it. In the meantime, if you do spot - * such a local variable, please consider fixing! - */ - if (size >> PAGE_SHIFT > INT_MAX) - return ERR_PTR(-E2BIG); - - if (overflows_type(size, obj->base.size)) - return ERR_PTR(-E2BIG); - obj = i915_gem_object_alloc(); if (!obj) return ERR_PTR(-ENOMEM); - ret = create_shmem(i915, &obj->base, size); + ret = __create_shmem(i915, &obj->base, size); if (ret) goto fail; @@ -518,7 +515,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size) i915_gem_object_set_cache_coherency(obj, cache_level); - trace_i915_gem_object_create(obj); + i915_gem_object_init_memory_region(obj, mem, 0); return obj; @@ -527,14 +524,22 @@ fail: return ERR_PTR(ret); } +struct drm_i915_gem_object * +i915_gem_object_create_shmem(struct drm_i915_private *i915, + resource_size_t size) +{ + return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM], + size, 0); +} + /* Allocate a new GEM object and fill it with the supplied data */ struct drm_i915_gem_object * i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv, - const void *data, size_t size) + const void *data, resource_size_t size) { struct drm_i915_gem_object *obj; struct file *file; - size_t offset; + resource_size_t offset; int err; obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE)); @@ -577,3 +582,35 @@ fail: i915_gem_object_put(obj); return ERR_PTR(err); } + +static int init_shmem(struct intel_memory_region *mem) +{ + int err; + + err = i915_gemfs_init(mem->i915); + if (err) { + DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", + err); + } + + return 0; /* Don't error, we can simply fallback to the kernel mnt */ +} + +static void release_shmem(struct intel_memory_region *mem) +{ + i915_gemfs_fini(mem->i915); +} + +static const struct intel_memory_region_ops shmem_region_ops = { + .init = init_shmem, + .release = release_shmem, + .create_object = create_shmem, +}; + +struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915) +{ + return intel_memory_region_create(i915, 0, + totalram_pages() << PAGE_SHIFT, + PAGE_SIZE, 0, + &shmem_region_ops); +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index edd21d14e64f..fd3ce6da8497 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -16,40 +16,6 @@ #include "i915_trace.h" -static bool shrinker_lock(struct drm_i915_private *i915, - unsigned int flags, - bool *unlock) -{ - struct mutex *m = &i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(m)) { - case MUTEX_TRYLOCK_RECURSIVE: - *unlock = false; - return true; - - case MUTEX_TRYLOCK_FAILED: - *unlock = false; - if (flags & I915_SHRINK_ACTIVE && - mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) - *unlock = true; - return *unlock; - - case MUTEX_TRYLOCK_SUCCESS: - *unlock = true; - return true; - } - - BUG(); -} - -static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) -{ - if (!unlock) - return; - - mutex_unlock(&i915->drm.struct_mutex); -} - static bool swap_available(void) { return get_nr_swap_pages() > 0; @@ -61,7 +27,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (!i915_gem_object_is_shrinkable(obj)) return false; - /* Only report true if by unbinding the object and putting its pages + /* + * Only report true if by unbinding the object and putting its pages * we can actually make forward progress towards freeing physical * pages. * @@ -72,16 +39,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count)) return false; - /* If any vma are "permanently" pinned, it will prevent us from - * reclaiming the obj->mm.pages. We only allow scanout objects to claim - * a permanent pin, along with a few others like the context objects. - * To simplify the scan, and to avoid walking the list of vma under the - * object, we just check the count of its permanently pinned. - */ - if (READ_ONCE(obj->pin_global)) - return false; - - /* We can only return physical pages to the system if we can either + /* + * We can only return physical pages to the system if we can either * discard the contents (because the user has marked them as being * purgeable) or if we can move their contents out to swap. */ @@ -162,10 +121,6 @@ i915_gem_shrink(struct drm_i915_private *i915, intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; - bool unlock; - - if (!shrinker_lock(i915, shrink, &unlock)) - return 0; /* * When shrinking the active list, we should also consider active @@ -275,8 +230,6 @@ i915_gem_shrink(struct drm_i915_private *i915, if (shrink & I915_SHRINK_BOUND) intel_runtime_pm_put(&i915->runtime_pm, wakeref); - shrinker_unlock(i915, unlock); - if (nr_scanned) *nr_scanned += scanned; return count; @@ -346,19 +299,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); unsigned long freed; - bool unlock; sc->nr_scanned = 0; - if (!shrinker_lock(i915, 0, &unlock)) - return SHRINK_STOP; - freed = i915_gem_shrink(i915, sc->nr_to_scan, &sc->nr_scanned, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); + I915_SHRINK_UNBOUND); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { intel_wakeref_t wakeref; @@ -373,8 +321,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) } } - shrinker_unlock(i915, unlock); - return sc->nr_scanned ? freed : SHRINK_STOP; } @@ -391,6 +337,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_WRITEBACK); @@ -426,10 +373,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr struct i915_vma *vma, *next; unsigned long freed_pages = 0; intel_wakeref_t wakeref; - bool unlock; - - if (!shrinker_lock(i915, 0, &unlock)) - return NOTIFY_DONE; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, @@ -446,15 +389,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr if (!vma->iomap || i915_vma_is_active(vma)) continue; - mutex_unlock(&i915->ggtt.vm.mutex); - if (i915_vma_unbind(vma) == 0) + if (__i915_vma_unbind(vma) == 0) freed_pages += count; - mutex_lock(&i915->ggtt.vm.mutex); } mutex_unlock(&i915->ggtt.vm.mutex); - shrinker_unlock(i915, unlock); - *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; } @@ -497,22 +436,9 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_acquire(GFP_KERNEL); - /* - * As we invariably rely on the struct_mutex within the shrinker, - * but have a complicated recursion dance, taint all the mutexes used - * within the shrinker with the struct_mutex. For completeness, we - * taint with all subclass of struct_mutex, even though we should - * only need tainting by I915_MM_NORMAL to catch possible ABBA - * deadlocks from using struct_mutex inside @mutex. - */ - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_SHRINKER, 0, _RET_IP_); - mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); mutex_release(&mutex->dep_map, 0, _RET_IP_); - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); - fs_reclaim_release(GFP_KERNEL); if (unlock) @@ -523,46 +449,52 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; + /* * We can only be called while the pages are pinned or when * the pages are released. If pinned, we should only be called * from a single caller under controlled conditions; and on release * only one caller may release us. Neither the two may cross. */ - if (!list_empty(&obj->mm.link)) { /* pinned by caller */ - struct drm_i915_private *i915 = obj_to_i915(obj); - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); - GEM_BUG_ON(list_empty(&obj->mm.link)); + if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0)) + return; + spin_lock_irqsave(&i915->mm.obj_lock, flags); + if (!atomic_fetch_inc(&obj->mm.shrink_pin) && + !list_empty(&obj->mm.link)) { list_del_init(&obj->mm.link); i915->mm.shrink_count--; i915->mm.shrink_memory -= obj->base.size; - - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj, struct list_head *head) { + struct drm_i915_private *i915 = obj_to_i915(obj); + unsigned long flags; + GEM_BUG_ON(!i915_gem_object_has_pages(obj)); - GEM_BUG_ON(!list_empty(&obj->mm.link)); + if (!i915_gem_object_is_shrinkable(obj)) + return; - if (i915_gem_object_is_shrinkable(obj)) { - struct drm_i915_private *i915 = obj_to_i915(obj); - unsigned long flags; + if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1)) + return; - spin_lock_irqsave(&i915->mm.obj_lock, flags); - GEM_BUG_ON(!kref_read(&obj->base.refcount)); + spin_lock_irqsave(&i915->mm.obj_lock, flags); + GEM_BUG_ON(!kref_read(&obj->base.refcount)); + if (atomic_dec_and_test(&obj->mm.shrink_pin)) { + GEM_BUG_ON(!list_empty(&obj->mm.link)); list_add_tail(&obj->mm.link, head); i915->mm.shrink_count++; i915->mm.shrink_memory += obj->base.size; - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } + spin_unlock_irqrestore(&i915->mm.obj_lock, flags); } void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index aa533b4ab5f5..57cd8bc2657c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -10,6 +10,7 @@ #include <drm/drm_mm.h> #include <drm/i915_drm.h> +#include "gem/i915_gem_region.h" #include "i915_drv.h" #include "i915_gem_stolen.h" @@ -150,7 +151,7 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, return 0; } -void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv) +static void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv) { if (!drm_mm_initialized(&dev_priv->mm.stolen)) return; @@ -355,7 +356,7 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, } } -int i915_gem_init_stolen(struct drm_i915_private *dev_priv) +static int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { resource_size_t reserved_base, stolen_top; resource_size_t reserved_total, reserved_size; @@ -425,8 +426,11 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) bdw_get_stolen_reserved(dev_priv, &reserved_base, &reserved_size); break; - case 11: default: + MISSING_CASE(INTEL_GEN(dev_priv)); + /* fall-through */ + case 11: + case 12: icl_get_stolen_reserved(dev_priv, &reserved_base, &reserved_size); break; @@ -536,6 +540,9 @@ i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) i915_gem_stolen_remove_node(dev_priv, stolen); kfree(stolen); + + if (obj->mm.region) + i915_gem_object_release_memory_region(obj); } static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { @@ -545,15 +552,17 @@ static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { }; static struct drm_i915_gem_object * -_i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - struct drm_mm_node *stolen) +__i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + struct drm_mm_node *stolen, + struct intel_memory_region *mem) { struct drm_i915_gem_object *obj; unsigned int cache_level; + int err = -ENOMEM; obj = i915_gem_object_alloc(); - if (obj == NULL) - return NULL; + if (!obj) + goto err; drm_gem_private_object_init(&dev_priv->drm, &obj->base, stolen->size); i915_gem_object_init(obj, &i915_gem_object_stolen_ops); @@ -563,47 +572,95 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; i915_gem_object_set_cache_coherency(obj, cache_level); - if (i915_gem_object_pin_pages(obj)) + err = i915_gem_object_pin_pages(obj); + if (err) goto cleanup; + if (mem) + i915_gem_object_init_memory_region(obj, mem, 0); + return obj; cleanup: i915_gem_object_free(obj); - return NULL; +err: + return ERR_PTR(err); } -struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, - resource_size_t size) +static struct drm_i915_gem_object * +_i915_gem_object_create_stolen(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) { + struct drm_i915_private *dev_priv = mem->i915; struct drm_i915_gem_object *obj; struct drm_mm_node *stolen; int ret; if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; + return ERR_PTR(-ENODEV); if (size == 0) - return NULL; + return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return NULL; + return ERR_PTR(-ENOMEM); ret = i915_gem_stolen_insert_node(dev_priv, stolen, size, 4096); if (ret) { - kfree(stolen); - return NULL; + obj = ERR_PTR(ret); + goto err_free; } - obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj) - return obj; + obj = __i915_gem_object_create_stolen(dev_priv, stolen, mem); + if (IS_ERR(obj)) + goto err_remove; + return obj; + +err_remove: i915_gem_stolen_remove_node(dev_priv, stolen); +err_free: kfree(stolen); - return NULL; + return obj; +} + +struct drm_i915_gem_object * +i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, + resource_size_t size) +{ + return i915_gem_object_create_region(dev_priv->mm.regions[INTEL_REGION_STOLEN], + size, I915_BO_ALLOC_CONTIGUOUS); +} + +static int init_stolen(struct intel_memory_region *mem) +{ + /* + * Initialise stolen early so that we may reserve preallocated + * objects for the BIOS to KMS transition. + */ + return i915_gem_init_stolen(mem->i915); +} + +static void release_stolen(struct intel_memory_region *mem) +{ + i915_gem_cleanup_stolen(mem->i915); +} + +static const struct intel_memory_region_ops i915_region_stolen_ops = { + .init = init_stolen, + .release = release_stolen, + .create_object = _i915_gem_object_create_stolen, +}; + +struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915) +{ + return intel_memory_region_create(i915, + intel_graphics_stolen_res.start, + resource_size(&intel_graphics_stolen_res), + PAGE_SIZE, 0, + &i915_region_stolen_ops); } struct drm_i915_gem_object * @@ -619,9 +676,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv int ret; if (!drm_mm_initialized(&dev_priv->mm.stolen)) - return NULL; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); + return ERR_PTR(-ENODEV); DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", &stolen_offset, >t_offset, &size); @@ -630,11 +685,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (WARN_ON(size == 0) || WARN_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)) || WARN_ON(!IS_ALIGNED(stolen_offset, I915_GTT_MIN_ALIGNMENT))) - return NULL; + return ERR_PTR(-EINVAL); stolen = kzalloc(sizeof(*stolen), GFP_KERNEL); if (!stolen) - return NULL; + return ERR_PTR(-ENOMEM); stolen->start = stolen_offset; stolen->size = size; @@ -644,15 +699,15 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); kfree(stolen); - return NULL; + return ERR_PTR(ret); } - obj = _i915_gem_object_create_stolen(dev_priv, stolen); - if (obj == NULL) { + obj = __i915_gem_object_create_stolen(dev_priv, stolen, NULL); + if (IS_ERR(obj)) { DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); i915_gem_stolen_remove_node(dev_priv, stolen); kfree(stolen); - return NULL; + return obj; } /* Some objects just need physical mem from stolen space */ @@ -674,22 +729,26 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv * setting up the GTT space. The actual reservation will occur * later. */ + mutex_lock(&ggtt->vm.mutex); ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, size, gtt_offset, obj->cache_level, 0); if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); + mutex_unlock(&ggtt->vm.mutex); goto err_pages; } GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->pages); vma->pages = obj->mm.pages; - vma->flags |= I915_VMA_GLOBAL_BIND; + atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); + + set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); __i915_vma_set_map_and_fenceable(vma); - mutex_lock(&ggtt->vm.mutex); - list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); + list_add_tail(&vma->vm_link, &ggtt->vm.bound_list); mutex_unlock(&ggtt->vm.mutex); GEM_BUG_ON(i915_gem_object_is_shrinkable(obj)); @@ -701,5 +760,5 @@ err_pages: i915_gem_object_unpin_pages(obj); err: i915_gem_object_put(obj); - return NULL; + return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h index 2289644d8604..c1040627fbf3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h @@ -21,8 +21,7 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, u64 end); void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, struct drm_mm_node *node); -int i915_gem_init_stolen(struct drm_i915_private *dev_priv); -void i915_gem_cleanup_stolen(struct drm_i915_private *dev_priv); +struct intel_memory_region *i915_gem_stolen_setup(struct drm_i915_private *i915); struct drm_i915_gem_object * i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, resource_size_t size); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index 1e372420771b..540ef0551789 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -50,10 +50,8 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, if (time_after_eq(request->emitted_jiffies, recent_enough)) break; - if (target) { + if (target && xchg(&target->file_priv, NULL)) list_del(&target->client_link); - target->file_priv = NULL; - } target = request; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index ca0c2f451742..1fa592d82af5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -181,22 +181,25 @@ static int i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode, unsigned int stride) { + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; struct i915_vma *vma; - int ret; + int ret = 0; if (tiling_mode == I915_TILING_NONE) return 0; + mutex_lock(&ggtt->vm.mutex); for_each_ggtt_vma(vma, obj) { if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); if (ret) - return ret; + break; } + mutex_unlock(&ggtt->vm.mutex); - return 0; + return ret; } int @@ -212,7 +215,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); - lockdep_assert_held(&i915->drm.struct_mutex); if ((tiling | stride) == obj->tiling_and_stride) return 0; @@ -233,16 +235,18 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * whilst executing a fenced command for an untiled object. */ - err = i915_gem_object_fence_prepare(obj, tiling, stride); - if (err) - return err; - i915_gem_object_lock(obj); if (i915_gem_object_is_framebuffer(obj)) { i915_gem_object_unlock(obj); return -EBUSY; } + err = i915_gem_object_fence_prepare(obj, tiling, stride); + if (err) { + i915_gem_object_unlock(obj); + return err; + } + /* If the memory has unknown (i.e. varying) swizzling, we pin the * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. @@ -313,10 +317,14 @@ int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_set_tiling *args = data; struct drm_i915_gem_object *obj; int err; + if (!dev_priv->ggtt.num_fences) + return -EOPNOTSUPP; + obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; @@ -340,9 +348,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, args->stride = 0; } else { if (args->tiling_mode == I915_TILING_X) - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_x; + args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_x; else - args->swizzle_mode = to_i915(dev)->mm.bit_6_swizzle_y; + args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_y; /* Hide bit 17 swizzling from the user. This prevents old Mesa * from aborting the application on sw fallbacks to bit 17, @@ -364,12 +372,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, } } - err = mutex_lock_interruptible(&dev->struct_mutex); - if (err) - goto err; - err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); - mutex_unlock(&dev->struct_mutex); /* We have to maintain this existing ABI... */ args->stride = i915_gem_object_get_stride(obj); @@ -402,6 +405,9 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int err = -ENOENT; + if (!dev_priv->ggtt.num_fences) + return -EOPNOTSUPP; + rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); if (obj) { @@ -415,10 +421,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data, switch (args->tiling_mode) { case I915_TILING_X: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_x; break; case I915_TILING_Y: - args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_y; break; default: case I915_TILING_NONE: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 11b231c187c5..4f970474013f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -92,7 +92,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); struct interval_tree_node *it; - struct mutex *unlock = NULL; unsigned long end; int ret = 0; @@ -129,33 +128,13 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); - if (!unlock) { - unlock = &mn->mm->i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(unlock)) { - default: - case MUTEX_TRYLOCK_FAILED: - if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { - i915_gem_object_put(obj); - return -EINTR; - } - /* fall through */ - case MUTEX_TRYLOCK_SUCCESS: - break; - - case MUTEX_TRYLOCK_RECURSIVE: - unlock = ERR_PTR(-EEXIST); - break; - } - } - ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); if (ret == 0) ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); i915_gem_object_put(obj); if (ret) - goto unlock; + return ret; spin_lock(&mn->lock); @@ -168,10 +147,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); -unlock: - if (!IS_ERR_OR_NULL(unlock)) - mutex_unlock(unlock); - return ret; } @@ -702,6 +677,7 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE | + I915_GEM_OBJECT_NO_GGTT | I915_GEM_OBJECT_ASYNC_CANCEL, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, @@ -782,7 +758,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, * On almost all of the older hw, we cannot tell the GPU that * a page is readonly. */ - vm = dev_priv->kernel_context->vm; + vm = rcu_dereference_protected(dev_priv->kernel_context->vm, + true); /* static vm */ if (!vm || !vm->has_read_only) return -ENODEV; } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 8de83c6d81f5..f27772f6779a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -8,6 +8,7 @@ #include "i915_selftest.h" +#include "gem/i915_gem_region.h" #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" @@ -17,6 +18,7 @@ #include "selftests/mock_drm.h" #include "selftests/mock_gem_device.h" +#include "selftests/mock_region.h" #include "selftests/i915_random.h" static const unsigned int page_sizes[] = { @@ -113,8 +115,6 @@ static int get_huge_pages(struct drm_i915_gem_object *obj) if (i915_gem_gtt_prepare_pages(obj, st)) goto err; - obj->mm.madv = I915_MADV_DONTNEED; - GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); __i915_gem_object_set_pages(obj, st, sg_page_sizes); @@ -135,7 +135,6 @@ static void put_huge_pages(struct drm_i915_gem_object *obj, huge_pages_free_pages(pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops huge_page_ops = { @@ -168,6 +167,8 @@ huge_pages_object(struct drm_i915_private *i915, drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &huge_page_ops); + i915_gem_object_set_volatile(obj); + obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; @@ -227,8 +228,6 @@ static int fake_get_huge_pages(struct drm_i915_gem_object *obj) i915_sg_trim(st); - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -261,8 +260,6 @@ static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; sg_dma_address(sg) = page_size; - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, st, sg->length); return 0; @@ -281,7 +278,6 @@ static void fake_put_huge_pages(struct drm_i915_gem_object *obj, { fake_free_huge_pages(obj, pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops fake_ops = { @@ -321,6 +317,8 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) else i915_gem_object_init(obj, &fake_ops); + i915_gem_object_set_volatile(obj); + obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; @@ -333,7 +331,12 @@ static int igt_check_page_sizes(struct i915_vma *vma) struct drm_i915_private *i915 = vma->vm->i915; unsigned int supported = INTEL_INFO(i915)->page_sizes; struct drm_i915_gem_object *obj = vma->obj; - int err = 0; + int err; + + /* We have to wait for the async bind to complete before our asserts */ + err = i915_vma_sync(vma); + if (err) + return err; if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { pr_err("unsupported page_sizes.sg=%u, supported=%u\n", @@ -447,6 +450,88 @@ out_device: return err; } +static int igt_mock_memory_region_huge_pages(void *arg) +{ + const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS }; + struct i915_ppgtt *ppgtt = arg; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned long supported = INTEL_INFO(i915)->page_sizes; + struct intel_memory_region *mem; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int bit; + int err = 0; + + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("%s failed to create memory region\n", __func__); + return PTR_ERR(mem); + } + + for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + unsigned int page_size = BIT(bit); + resource_size_t phys; + int i; + + for (i = 0; i < ARRAY_SIZE(flags); ++i) { + obj = i915_gem_object_create_region(mem, page_size, + flags[i]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_region; + } + + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_close; + + err = igt_check_page_sizes(vma); + if (err) + goto out_unpin; + + phys = i915_gem_object_get_dma_address(obj, 0); + if (!IS_ALIGNED(phys, page_size)) { + pr_err("%s addr misaligned(%pa) page_size=%u\n", + __func__, &phys, page_size); + err = -EINVAL; + goto out_unpin; + } + + if (vma->page_sizes.gtt != page_size) { + pr_err("%s page_sizes.gtt=%u, expected=%u\n", + __func__, vma->page_sizes.gtt, + page_size); + err = -EINVAL; + goto out_unpin; + } + + i915_vma_unpin(vma); + i915_vma_close(vma); + + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + i915_gem_object_put(obj); + } + } + + goto out_region; + +out_unpin: + i915_vma_unpin(vma); +out_close: + i915_vma_close(vma); +out_put: + i915_gem_object_put(obj); +out_region: + intel_memory_region_put(mem); + return err; +} + static int igt_mock_ppgtt_misaligned_dma(void *arg) { struct i915_ppgtt *ppgtt = arg; @@ -879,9 +964,8 @@ out_object_put: return err; } -static int gpu_write(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int gpu_write(struct intel_context *ce, + struct i915_vma *vma, u32 dw, u32 val) { @@ -893,7 +977,7 @@ static int gpu_write(struct i915_vma *vma, if (err) return err; - return igt_gpu_fill_dw(vma, ctx, engine, dw * sizeof(u32), + return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), vma->size >> PAGE_SHIFT, val); } @@ -929,18 +1013,16 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) return err; } -static int __igt_write_huge(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int __igt_write_huge(struct intel_context *ce, struct drm_i915_gem_object *obj, u64 size, u64 offset, u32 dword, u32 val) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; struct i915_vma *vma; int err; - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -954,7 +1036,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, * The ggtt may have some pages reserved so * refrain from erroring out. */ - if (err == -ENOSPC && i915_is_ggtt(vm)) + if (err == -ENOSPC && i915_is_ggtt(ce->vm)) err = 0; goto out_vma_close; @@ -964,7 +1046,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, if (err) goto out_vma_unpin; - err = gpu_write(vma, ctx, engine, dword, val); + err = gpu_write(ce, vma, dword, val); if (err) { pr_err("gpu-write failed at offset=%llx\n", offset); goto out_vma_unpin; @@ -987,14 +1069,13 @@ out_vma_close: static int igt_write_huge(struct i915_gem_context *ctx, struct drm_i915_gem_object *obj) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; - static struct intel_engine_cs *engines[I915_NUM_ENGINES]; - struct intel_engine_cs *engine; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct intel_context *ce; I915_RND_STATE(prng); IGT_TIMEOUT(end_time); unsigned int max_page_size; - unsigned int id; + unsigned int count; u64 max; u64 num; u64 size; @@ -1008,19 +1089,18 @@ static int igt_write_huge(struct i915_gem_context *ctx, if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) size = round_up(size, I915_GTT_PAGE_SIZE_2M); - max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); - max = div_u64((vm->total - size), max_page_size); - n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) { - pr_info("store-dword-imm not supported on engine=%u\n", - id); + count = 0; + max = U64_MAX; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) continue; - } - engines[n++] = engine; - } + max = min(max, ce->vm->total); + n++; + } + i915_gem_context_unlock_engines(ctx); if (!n) return 0; @@ -1029,23 +1109,30 @@ static int igt_write_huge(struct i915_gem_context *ctx, * randomized order, lets also make feeding to the same engine a few * times in succession a possibility by enlarging the permutation array. */ - order = i915_random_order(n * I915_NUM_ENGINES, &prng); + order = i915_random_order(count * count, &prng); if (!order) return -ENOMEM; + max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); + max = div_u64(max - size, max_page_size); + /* * Try various offsets in an ascending/descending fashion until we * timeout -- we want to avoid issues hidden by effectively always using * offset = 0. */ i = 0; + engines = i915_gem_context_lock_engines(ctx); for_each_prime_number_from(num, 0, max) { u64 offset_low = num * max_page_size; u64 offset_high = (max - num) * max_page_size; u32 dword = offset_in_page(num) / 4; + struct intel_context *ce; - engine = engines[order[i] % n]; - i = (i + 1) % (n * I915_NUM_ENGINES); + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; /* * In order to utilize 64K pages we need to both pad the vma @@ -1057,22 +1144,23 @@ static int igt_write_huge(struct i915_gem_context *ctx, offset_low = round_down(offset_low, I915_GTT_PAGE_SIZE_2M); - err = __igt_write_huge(ctx, engine, obj, size, offset_low, + err = __igt_write_huge(ce, obj, size, offset_low, dword, num + 1); if (err) break; - err = __igt_write_huge(ctx, engine, obj, size, offset_high, + err = __igt_write_huge(ce, obj, size, offset_high, dword, num + 1); if (err) break; if (igt_timeout(end_time, - "%s timed out on engine=%u, offset_low=%llx offset_high=%llx, max_page_size=%x\n", - __func__, engine->id, offset_low, offset_high, + "%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n", + __func__, ce->engine->name, offset_low, offset_high, max_page_size)) break; } + i915_gem_context_unlock_engines(ctx); kfree(order); @@ -1314,15 +1402,15 @@ static int igt_ppgtt_pin_update(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *dev_priv = ctx->i915; unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; - struct i915_address_space *vm = ctx->vm; struct drm_i915_gem_object *obj; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; - struct intel_engine_cs *engine; - enum intel_engine_id id; unsigned int n; int first, last; - int err; + int err = 0; /* * Make sure there's no funny business when doing a PIN_UPDATE -- in the @@ -1332,9 +1420,10 @@ static int igt_ppgtt_pin_update(void *arg) * huge-gtt-pages. */ - if (!vm || !i915_vm_is_4lvl(vm)) { + vm = i915_gem_context_get_vm_rcu(ctx); + if (!i915_vm_is_4lvl(vm)) { pr_info("48b PPGTT not supported, skipping\n"); - return 0; + goto out_vm; } first = ilog2(I915_GTT_PAGE_SIZE_64K); @@ -1387,7 +1476,7 @@ static int igt_ppgtt_pin_update(void *arg) goto out_unpin; } - err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL); if (err) goto out_unpin; @@ -1419,14 +1508,18 @@ static int igt_ppgtt_pin_update(void *arg) */ n = 0; - for_each_engine(engine, dev_priv, id) { - if (!intel_engine_can_store_dword(engine)) + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; - err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf); + err = gpu_write(ce, vma, n++, 0xdeadbeaf); if (err) - goto out_unpin; + break; } + i915_gem_context_unlock_engines(ctx); + if (err) + goto out_unpin; + while (n--) { err = cpu_check(obj, n, 0xdeadbeaf); if (err) @@ -1439,6 +1532,8 @@ out_close: i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1448,7 +1543,7 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1498,6 +1593,7 @@ out_put: out_restore: i915->mm.gemfs = gemfs; + i915_vm_put(vm); return err; } @@ -1505,14 +1601,14 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct i915_gem_engines_iter it; + struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER; unsigned int n; - int err; + int err = 0; /* * Sanity check shrinking huge-paged object -- make sure nothing blows @@ -1521,12 +1617,14 @@ static int igt_shrink_thp(void *arg) if (!igt_can_allocate_thp(i915)) { pr_info("missing THP support, skipping\n"); - return 0; + goto out_vm; } obj = i915_gem_object_create_shmem(i915, SZ_2M); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { @@ -1548,16 +1646,19 @@ static int igt_shrink_thp(void *arg) goto out_unpin; n = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; - err = gpu_write(vma, ctx, engine, n++, 0xdeadbeaf); + err = gpu_write(ce, vma, n++, 0xdeadbeaf); if (err) - goto out_unpin; + break; } - + i915_gem_context_unlock_engines(ctx); i915_vma_unpin(vma); + if (err) + goto out_close; /* * Now that the pages are *unpinned* shrink-all should invoke @@ -1583,16 +1684,17 @@ static int igt_shrink_thp(void *arg) while (n--) { err = cpu_check(obj, n, 0xdeadbeaf); if (err) - goto out_unpin; + break; } - out_unpin: i915_vma_unpin(vma); out_close: i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1601,6 +1703,7 @@ int i915_gem_huge_page_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_mock_exhaust_device_supported_pages), + SUBTEST(igt_mock_memory_region_huge_pages), SUBTEST(igt_mock_ppgtt_misaligned_dma), SUBTEST(igt_mock_ppgtt_huge_fill), SUBTEST(igt_mock_ppgtt_64K), @@ -1617,7 +1720,6 @@ int i915_gem_huge_page_mock_selftests(void) mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; mkwrite_device_info(dev_priv)->ppgtt_size = 48; - mutex_lock(&dev_priv->drm.struct_mutex); ppgtt = i915_ppgtt_create(dev_priv); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); @@ -1643,9 +1745,7 @@ out_close: i915_vm_put(&ppgtt->vm); out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); drm_dev_put(&dev_priv->drm); - return err; } @@ -1661,7 +1761,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) }; struct drm_file *file; struct i915_gem_context *ctx; - intel_wakeref_t wakeref; + struct i915_address_space *vm; int err; if (!HAS_PPGTT(i915)) { @@ -1676,25 +1776,21 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - if (ctx->vm) - ctx->vm->scrub_64K = true; + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->scrub_64K, true); + mutex_unlock(&ctx->mutex); err = i915_subtests(tests, ctx); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - +out_file: mock_file_free(i915, file); - return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 0ff7a89aadca..549810f70aeb 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" #include "i915_selftest.h" #include "selftests/i915_random.h" @@ -78,7 +79,7 @@ static int gtt_set(struct drm_i915_gem_object *obj, { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); @@ -90,15 +91,21 @@ static int gtt_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } iowrite32(v, &map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } static int gtt_get(struct drm_i915_gem_object *obj, @@ -107,7 +114,7 @@ static int gtt_get(struct drm_i915_gem_object *obj, { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); @@ -119,15 +126,21 @@ static int gtt_get(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } *v = ioread32(&map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } static int wc_set(struct drm_i915_gem_object *obj, @@ -280,7 +293,6 @@ static int igt_gem_coherency(void *arg) struct drm_i915_private *i915 = arg; const struct igt_coherency_mode *read, *write, *over; struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; unsigned long count, n; u32 *offsets, *values; int err = 0; @@ -299,8 +311,6 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); for (over = igt_coherency_mode; over->name; over++) { if (!over->set) continue; @@ -326,7 +336,7 @@ static int igt_gem_coherency(void *arg) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto unlock; + goto free; } i915_random_reorder(offsets, ncachelines, &prng); @@ -377,15 +387,13 @@ static int igt_gem_coherency(void *arg) } } } -unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); +free: kfree(offsets); return err; put_object: i915_gem_object_put(obj); - goto unlock; + goto free; } int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 3e6f4a65d356..e5c235051ae5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -8,6 +8,7 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -52,19 +53,17 @@ static int live_nop_switch(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); if (!ctx) { err = -ENOMEM; - goto out_unlock; + goto out_file; } for (n = 0; n < nctx; n++) { ctx[n] = live_context(i915, file); if (IS_ERR(ctx[n])) { err = PTR_ERR(ctx[n]); - goto out_unlock; + goto out_file; } } @@ -78,7 +77,7 @@ static int live_nop_switch(void *arg) rq = igt_request_alloc(ctx[n], engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unlock; + goto out_file; } i915_request_add(rq); } @@ -86,7 +85,7 @@ static int live_nop_switch(void *arg) pr_err("Failed to populated %d contexts\n", nctx); intel_gt_set_wedged(&i915->gt); err = -EIO; - goto out_unlock; + goto out_file; } times[1] = ktime_get_raw(); @@ -96,7 +95,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; end_time = jiffies + i915_selftest.timeout_jiffies; for_each_prime_number_from(prime, 2, 8192) { @@ -106,7 +105,7 @@ static int live_nop_switch(void *arg) rq = igt_request_alloc(ctx[n % nctx], engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unlock; + goto out_file; } /* @@ -142,7 +141,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_end(&t); if (err) - goto out_unlock; + goto out_file; pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -150,8 +149,212 @@ static int live_nop_switch(void *arg) prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); +out_file: + mock_file_free(i915, file); + return err; +} + +struct parallel_switch { + struct task_struct *tsk; + struct intel_context *ce[2]; +}; + +static int __live_parallel_switch1(void *data) +{ + struct parallel_switch *arg = data; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + struct i915_request *rq = NULL; + int err, n; + + for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + i915_request_put(rq); + + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + } + + err = 0; + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + return err; + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int __live_parallel_switchN(void *data) +{ + struct parallel_switch *arg = data; + IGT_TIMEOUT(end_time); + unsigned long count; + int n; + + count = 0; + do { + for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + struct i915_request *rq; + + rq = i915_request_create(arg->ce[n]); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + } + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); + return 0; +} + +static int live_parallel_switch(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + __live_parallel_switch1, + __live_parallel_switchN, + NULL, + }; + struct parallel_switch *data = NULL; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + int (* const *fn)(void *arg); + struct i915_gem_context *ctx; + struct intel_context *ce; + struct drm_file *file; + int n, m, count; + int err = 0; + + /* + * Check we can process switches on all engines simultaneously. + */ + + if (!DRIVER_CAPS(i915)->has_logical_contexts) + return 0; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + engines = i915_gem_context_lock_engines(ctx); + count = engines->num_engines; + + data = kcalloc(count, sizeof(*data), GFP_KERNEL); + if (!data) { + i915_gem_context_unlock_engines(ctx); + err = -ENOMEM; + goto out_file; + } + + m = 0; /* Use the first context as our template for the engines */ + for_each_gem_engine(ce, engines, it) { + err = intel_context_pin(ce); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out; + } + data[m++].ce[0] = intel_context_get(ce); + } + i915_gem_context_unlock_engines(ctx); + + /* Clone the same set of engines into the other contexts */ + for (n = 1; n < ARRAY_SIZE(data->ce); n++) { + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + + for (m = 0; m < count; m++) { + if (!data[m].ce[0]) + continue; + + ce = intel_context_create(ctx, data[m].ce[0]->engine); + if (IS_ERR(ce)) + goto out; + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + goto out; + } + + data[m].ce[n] = ce; + } + } + + for (fn = func; !err && *fn; fn++) { + struct igt_live_test t; + int n; + + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + break; + + for (n = 0; n < count; n++) { + if (!data[n].ce[0]) + continue; + + data[n].tsk = kthread_run(*fn, &data[n], + "igt/parallel:%s", + data[n].ce[0]->engine->name); + if (IS_ERR(data[n].tsk)) { + err = PTR_ERR(data[n].tsk); + break; + } + get_task_struct(data[n].tsk); + } + + for (n = 0; n < count; n++) { + int status; + + if (IS_ERR_OR_NULL(data[n].tsk)) + continue; + + status = kthread_stop(data[n].tsk); + if (status && !err) + err = status; + + put_task_struct(data[n].tsk); + data[n].tsk = NULL; + } + + if (igt_live_test_end(&t)) + err = -EIO; + } + +out: + for (n = 0; n < count; n++) { + for (m = 0; m < ARRAY_SIZE(data->ce); m++) { + if (!data[n].ce[m]) + continue; + + intel_context_unpin(data[n].ce[m]); + intel_context_put(data[n].ce[m]); + } + } + kfree(data); +out_file: mock_file_free(i915, file); return err; } @@ -166,28 +369,20 @@ static unsigned long fake_page_count(struct drm_i915_gem_object *obj) return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; } -static int gpu_fill(struct drm_i915_gem_object *obj, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int gpu_fill(struct intel_context *ce, + struct drm_i915_gem_object *obj, unsigned int dw) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; struct i915_vma *vma; int err; - GEM_BUG_ON(obj->base.size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + GEM_BUG_ON(obj->base.size > ce->vm->total); + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); - vma = i915_vma_instance(obj, vm, NULL); + vma = i915_vma_instance(obj, ce->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); - i915_gem_object_lock(obj); - err = i915_gem_object_set_to_gtt_domain(obj, true); - i915_gem_object_unlock(obj); - if (err) - return err; - err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); if (err) return err; @@ -200,9 +395,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, * whilst checking that each context provides a unique view * into the object. */ - err = igt_gpu_fill_dw(vma, - ctx, - engine, + err = igt_gpu_fill_dw(ce, vma, (dw * real_page_count(obj)) << PAGE_SHIFT | (dw * sizeof(u32)), real_page_count(obj), @@ -305,22 +498,21 @@ static int file_add_object(struct drm_file *file, } static struct drm_i915_gem_object * -create_test_object(struct i915_gem_context *ctx, +create_test_object(struct i915_address_space *vm, struct drm_file *file, struct list_head *objects) { struct drm_i915_gem_object *obj; - struct i915_address_space *vm = ctx->vm ?: &ctx->i915->ggtt.vm; u64 size; int err; /* Keep in GEM's good graces */ - i915_retire_requests(ctx->i915); + intel_gt_retire_requests(vm->gt); size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); size = round_down(size, DW_PER_PAGE * PAGE_SIZE); - obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); if (IS_ERR(obj)) return obj; @@ -348,6 +540,45 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj) return npages / DW_PER_PAGE; } +static void throttle_release(struct i915_request **q, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (IS_ERR_OR_NULL(q[i])) + continue; + + i915_request_put(fetch_and_zero(&q[i])); + } +} + +static int throttle(struct intel_context *ce, + struct i915_request **q, int count) +{ + int i; + + if (!IS_ERR_OR_NULL(q[0])) { + if (i915_request_wait(q[0], + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) + return -EINTR; + + i915_request_put(q[0]); + } + + for (i = 0; i < count - 1; i++) + q[i] = q[i + 1]; + + q[i] = intel_context_create_request(ce); + if (IS_ERR(q[i])) + return PTR_ERR(q[i]); + + i915_request_get(q[i]); + i915_request_add(q[i]); + + return 0; +} + static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; @@ -367,6 +598,7 @@ static int igt_ctx_exec(void *arg) for_each_engine(engine, i915, id) { struct drm_i915_gem_object *obj = NULL; unsigned long ncontexts, ndwords, dw; + struct i915_request *tq[5] = {}; struct igt_live_test t; struct drm_file *file; IGT_TIMEOUT(end_time); @@ -382,39 +614,53 @@ static int igt_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; ncontexts = 0; ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; + struct intel_context *ce; - ctx = live_context(i915, file); + ctx = kernel_context(i915); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); + if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto out_unlock; + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; } } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); - goto out_unlock; + engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); + kernel_context_close(ctx); + goto out_file; } if (++dw == max_dwords(obj)) { @@ -424,6 +670,9 @@ static int igt_ctx_exec(void *arg) ndwords++; ncontexts++; + + intel_context_put(ce); + kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", @@ -441,10 +690,10 @@ static int igt_ctx_exec(void *arg) dw += rem; } -out_unlock: +out_file: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); if (err) @@ -459,6 +708,7 @@ out_unlock: static int igt_shared_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; + struct i915_request *tq[5] = {}; struct i915_gem_context *parent; struct intel_engine_cs *engine; enum intel_engine_id id; @@ -478,22 +728,20 @@ static int igt_shared_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - parent = live_context(i915, file); if (IS_ERR(parent)) { err = PTR_ERR(parent); - goto out_unlock; + goto out_file; } if (!parent->vm) { /* not full-ppgtt; nothing to share */ err = 0; - goto out_unlock; + goto out_file; } err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; for_each_engine(engine, i915, id) { unsigned long ncontexts, ndwords, dw; @@ -509,6 +757,7 @@ static int igt_shared_ctx_exec(void *arg) ncontexts = 0; while (!time_after(jiffies, end_time)) { struct i915_gem_context *ctx; + struct intel_context *ce; ctx = kernel_context(i915); if (IS_ERR(ctx)) { @@ -516,23 +765,38 @@ static int igt_shared_ctx_exec(void *arg) goto out_test; } + mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, parent->vm); + mutex_unlock(&ctx->mutex); + + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + GEM_BUG_ON(IS_ERR(ce)); if (!obj) { - obj = create_test_object(parent, file, &objects); + obj = create_test_object(parent->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); + intel_context_put(ce); kernel_context_close(ctx); goto out_test; } } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); + engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + intel_context_put(ce); + kernel_context_close(ctx); + goto out_test; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + intel_context_put(ce); kernel_context_close(ctx); goto out_test; } @@ -545,6 +809,7 @@ static int igt_shared_ctx_exec(void *arg) ndwords++; ncontexts++; + intel_context_put(ce); kernel_context_close(ctx); } pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", @@ -562,16 +827,13 @@ static int igt_shared_ctx_exec(void *arg) dw += rem; } - mutex_unlock(&i915->drm.struct_mutex); i915_gem_drain_freed_objects(i915); - mutex_lock(&i915->drm.struct_mutex); } out_test: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - +out_file: mock_file_free(i915, file); return err; } @@ -604,6 +866,8 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(vma->vm->gt); + vma = i915_vma_instance(obj, vma->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -681,10 +945,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto skip_request; - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); - + i915_vma_unpin_and_release(&batch, 0); i915_vma_unpin(vma); *rq_out = i915_request_get(rq); @@ -698,8 +959,7 @@ skip_request: err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); err_vma: i915_vma_unpin(vma); @@ -860,8 +1120,8 @@ out: igt_spinner_end(spin); if ((flags & TEST_IDLE) && ret == 0) { - ret = i915_gem_wait_for_idle(ce->engine->i915, - 0, MAX_SCHEDULE_TIMEOUT); + ret = intel_gt_wait_for_idle(ce->engine->gt, + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; @@ -887,7 +1147,7 @@ __sseu_test(const char *name, if (ret) return ret; - ret = __intel_context_reconfigure_sseu(ce, sseu); + ret = intel_context_reconfigure_sseu(ce, sseu); if (ret) goto out_spin; @@ -945,8 +1205,6 @@ __igt_ctx_sseu(struct drm_i915_private *i915, if (flags & TEST_RESET) igt_global_reset_lock(&i915->gt); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); @@ -991,7 +1249,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, goto out_fail; out_fail: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) ret = -EIO; intel_context_unpin(ce); @@ -1001,8 +1259,6 @@ out_put: i915_gem_object_put(obj); out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - if (flags & TEST_RESET) igt_global_reset_unlock(&i915->gt); @@ -1041,6 +1297,7 @@ static int igt_ctx_readonly(void *arg) { struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj = NULL; + struct i915_request *tq[5] = {}; struct i915_address_space *vm; struct i915_gem_context *ctx; unsigned long idx, ndwords, dw; @@ -1061,52 +1318,63 @@ static int igt_ctx_readonly(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - vm = ctx->vm ?: &i915->ggtt.alias->vm; + rcu_read_lock(); + vm = rcu_dereference(ctx->vm) ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { + rcu_read_unlock(); err = 0; - goto out_unlock; + goto out_file; } + rcu_read_unlock(); ndwords = 0; dw = 0; while (!time_after(jiffies, end_time)) { - struct intel_engine_cs *engine; - unsigned int id; + struct i915_gem_engines_iter it; + struct intel_context *ce; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_can_store_dword(ce->engine)) continue; if (!obj) { - obj = create_test_object(ctx, file, &objects); + obj = create_test_object(ce->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto out_unlock; + i915_gem_context_unlock_engines(ctx); + goto out_file; } if (prandom_u32_state(&prng) & 1) i915_gem_object_set_readonly(obj); } - err = gpu_fill(obj, ctx, engine, dw); + err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); - goto out_unlock; + ce->engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); + i915_gem_context_unlock_engines(ctx); + goto out_file; + } + + err = throttle(ce, tq, ARRAY_SIZE(tq)); + if (err) { + i915_gem_context_unlock_engines(ctx); + goto out_file; } if (++dw == max_dwords(obj)) { @@ -1115,6 +1383,7 @@ static int igt_ctx_readonly(void *arg) } ndwords++; } + i915_gem_context_unlock_engines(ctx); } pr_info("Submitted %lu dwords (across %u engines)\n", ndwords, RUNTIME_INFO(i915)->num_engines); @@ -1137,19 +1406,19 @@ static int igt_ctx_readonly(void *arg) dw += rem; } -out_unlock: +out_file: + throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); return err; } -static int check_scratch(struct i915_gem_context *ctx, u64 offset) +static int check_scratch(struct i915_address_space *vm, u64 offset) { struct drm_mm_node *node = - __drm_mm_interval_first(&ctx->vm->mm, + __drm_mm_interval_first(&vm->mm, offset, offset + sizeof(u32) - 1); if (!node || node->start > offset) return 0; @@ -1167,6 +1436,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *vma; u32 *cmd; @@ -1197,17 +1467,20 @@ static int write_to_scratch(struct i915_gem_context *ctx, __i915_gem_object_flush_map(obj, 0, 64); i915_gem_object_unpin_map(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + intel_gt_chipset_flush(engine->gt); + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto err_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto err_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1229,12 +1502,11 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) goto skip_request; - i915_vma_unpin(vma); - i915_vma_close(vma); - i915_vma_put(vma); + i915_vma_unpin_and_release(&vma, 0); i915_request_add(rq); + i915_vm_put(vm); return 0; skip_request: @@ -1243,6 +1515,8 @@ err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); +err_vm: + i915_vm_put(vm); err: i915_gem_object_put(obj); return err; @@ -1254,6 +1528,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ const u32 result = 0x100; struct i915_request *rq; @@ -1296,17 +1571,20 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + intel_gt_chipset_flush(engine->gt); + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto err_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto err_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1337,12 +1615,12 @@ static int read_from_scratch(struct i915_gem_context *ctx, err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) - goto err; + goto err_vm; cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto err_vm; } *value = cmd[result / sizeof(*cmd)]; @@ -1357,6 +1635,8 @@ err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); +err_vm: + i915_vm_put(vm); err: i915_gem_object_put(obj); return err; @@ -1387,27 +1667,25 @@ static int igt_vm_isolation(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx_a = live_context(i915, file); if (IS_ERR(ctx_a)) { err = PTR_ERR(ctx_a); - goto out_unlock; + goto out_file; } ctx_b = live_context(i915, file); if (IS_ERR(ctx_b)) { err = PTR_ERR(ctx_b); - goto out_unlock; + goto out_file; } /* We can only test vm isolation, if the vm are distinct */ if (ctx_a->vm == ctx_b->vm) - goto out_unlock; + goto out_file; vm_total = ctx_a->vm->total; GEM_BUG_ON(ctx_b->vm->total != vm_total); @@ -1436,7 +1714,7 @@ static int igt_vm_isolation(void *arg) err = read_from_scratch(ctx_b, engine, offset, &value); if (err) - goto out_unlock; + goto out_file; if (value) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", @@ -1445,7 +1723,7 @@ static int igt_vm_isolation(void *arg) lower_32_bits(offset), this); err = -EINVAL; - goto out_unlock; + goto out_file; } this++; @@ -1455,30 +1733,13 @@ static int igt_vm_isolation(void *arg) pr_info("Checked %lu scratch offsets across %d engines\n", count, RUNTIME_INFO(i915)->num_engines); -out_unlock: +out_file: if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); return err; } -static __maybe_unused const char * -__engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines) -{ - struct intel_engine_cs *engine; - intel_engine_mask_t tmp; - - if (engines == ALL_ENGINES) - return "all"; - - for_each_engine_masked(engine, i915, engines, tmp) - return engine->name; - - return "none"; -} - static bool skip_unused_engines(struct intel_context *ce, void *data) { return !ce->state; @@ -1506,13 +1767,9 @@ static int mock_context_barrier(void *arg) * a request; useful for retiring old state after loading new. */ - mutex_lock(&i915->drm.struct_mutex); - ctx = mock_context(i915, "mock"); - if (!ctx) { - err = -ENOMEM; - goto unlock; - } + if (!ctx) + return -ENOMEM; counter = 0; err = context_barrier_task(ctx, 0, @@ -1585,8 +1842,6 @@ static int mock_context_barrier(void *arg) out: mock_context_close(ctx); -unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; #undef pr_fmt #define pr_fmt(x) x @@ -1614,6 +1869,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_switch), + SUBTEST(live_parallel_switch), SUBTEST(igt_ctx_exec), SUBTEST(igt_ctx_readonly), SUBTEST(igt_ctx_sseu), diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 1d27babff0ce..65d4dbf91999 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -10,6 +10,7 @@ #include "gt/intel_gt_pm.h" #include "huge_gem_object.h" #include "i915_selftest.h" +#include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" struct tile { @@ -76,18 +77,103 @@ static u64 tiled_offset(const struct tile *tile, u64 v) static int check_partial_mapping(struct drm_i915_gem_object *obj, const struct tile *tile, - unsigned long end_time) + struct rnd_state *prng) { - const unsigned int nreal = obj->scratch / PAGE_SIZE; const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_ggtt_view view; struct i915_vma *vma; unsigned long page; + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; int err; - if (igt_timeout(end_time, - "%s: timed out before tiling=%d stride=%d\n", - __func__, tile->tiling, tile->stride)) - return -EINTR; + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) { + pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", + tile->tiling, tile->stride, err); + return err; + } + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + i915_gem_object_lock(obj); + err = i915_gem_object_set_to_gtt_domain(obj, true); + i915_gem_object_unlock(obj); + if (err) { + pr_err("Failed to flush to GTT write domain; err=%d\n", err); + return err; + } + + page = i915_prandom_u32_max_state(npages, prng); + view = compute_partial_view(obj, page, MIN_CHUNK_PAGES); + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(vma)); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", + page, (int)PTR_ERR(io)); + err = PTR_ERR(io); + goto out; + } + + iowrite32(page, io + n * PAGE_SIZE / sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + goto out; + + intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile->tiling ? tile_row_pages(obj) : 0, + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + +out: + i915_vma_destroy(vma); + return err; +} + +static int check_partial_mappings(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); if (err) { @@ -170,11 +256,42 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, return err; i915_vma_destroy(vma); + + if (igt_timeout(end_time, + "%s: timed out after tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; } return 0; } +static unsigned int +setup_tile_size(struct tile *tile, struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) <= 2) { + tile->height = 16; + tile->width = 128; + tile->size = 11; + } else if (tile->tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile->height = 32; + tile->width = 128; + tile->size = 12; + } else { + tile->height = 8; + tile->width = 512; + tile->size = 12; + } + + if (INTEL_GEN(i915) < 4) + return 8192 / tile->width; + else if (INTEL_GEN(i915) < 7) + return 128 * I965_FENCE_MAX_PITCH_VAL / tile->width; + else + return 128 * GEN7_FENCE_MAX_PITCH_VAL / tile->width; +} + static int igt_partial_tiling(void *arg) { const unsigned int nreal = 1 << 12; /* largest tile row x2 */ @@ -205,7 +322,6 @@ static int igt_partial_tiling(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (1) { @@ -219,7 +335,7 @@ static int igt_partial_tiling(void *arg) tile.swizzle = I915_BIT_6_SWIZZLE_NONE; tile.tiling = I915_TILING_NONE; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err && err != -EINTR) goto out_unlock; } @@ -241,10 +357,10 @@ static int igt_partial_tiling(void *arg) tile.tiling = tiling; switch (tiling) { case I915_TILING_X: - tile.swizzle = i915->mm.bit_6_swizzle_x; + tile.swizzle = i915->ggtt.bit_6_swizzle_x; break; case I915_TILING_Y: - tile.swizzle = i915->mm.bit_6_swizzle_y; + tile.swizzle = i915->ggtt.bit_6_swizzle_y; break; } @@ -253,31 +369,11 @@ static int igt_partial_tiling(void *arg) tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) continue; - if (INTEL_GEN(i915) <= 2) { - tile.height = 16; - tile.width = 128; - tile.size = 11; - } else if (tile.tiling == I915_TILING_Y && - HAS_128_BYTE_Y_TILING(i915)) { - tile.height = 32; - tile.width = 128; - tile.size = 12; - } else { - tile.height = 8; - tile.width = 512; - tile.size = 12; - } - - if (INTEL_GEN(i915) < 4) - max_pitch = 8192 / tile.width; - else if (INTEL_GEN(i915) < 7) - max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; - else - max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + max_pitch = setup_tile_size(&tile, i915); for (pitch = max_pitch; pitch; pitch >>= 1) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -285,7 +381,7 @@ static int igt_partial_tiling(void *arg) if (pitch > 2 && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch - 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -294,7 +390,7 @@ static int igt_partial_tiling(void *arg) if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { tile.stride = tile.width * (pitch + 1); - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -305,7 +401,7 @@ static int igt_partial_tiling(void *arg) if (INTEL_GEN(i915) >= 4) { for_each_prime_number(pitch, max_pitch) { tile.stride = tile.width * pitch; - err = check_partial_mapping(obj, &tile, end); + err = check_partial_mappings(obj, &tile, end); if (err == -EINTR) goto next_tiling; if (err) @@ -318,7 +414,97 @@ next_tiling: ; out_unlock: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_smoke_tiling(void *arg) +{ + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + intel_wakeref_t wakeref; + I915_RND_STATE(prng); + unsigned long count; + IGT_TIMEOUT(end); + int err; + + /* + * igt_partial_tiling() does an exhastive check of partial tiling + * chunking, but will undoubtably run out of time. Here, we do a + * randomised search and hope over many runs of 1s with different + * seeds we will do a thorough check. + * + * Remember to look at the st_seed if we see a flip-flop in BAT! + */ + + if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) + return 0; + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + do { + struct tile tile; + + tile.tiling = + i915_prandom_u32_max_state(I915_TILING_Y + 1, &prng); + switch (tile.tiling) { + case I915_TILING_NONE: + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + break; + + case I915_TILING_X: + tile.swizzle = i915->ggtt.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->ggtt.bit_6_swizzle_y; + break; + } + + if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (tile.tiling != I915_TILING_NONE) { + unsigned int max_pitch = setup_tile_size(&tile, i915); + + tile.stride = + i915_prandom_u32_max_state(max_pitch, &prng); + tile.stride = (1 + tile.stride) * tile.width; + if (INTEL_GEN(i915) < 4) + tile.stride = rounddown_pow_of_two(tile.stride); + } + + err = check_partial_mapping(obj, &tile, &prng); + if (err) + break; + + count++; + } while (!__igt_timeout(end, NULL)); + + pr_info("%s: Completed %lu trials\n", __func__, count); + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); @@ -386,21 +572,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_driver_unregister__shrinker(i915); - intel_gt_pm_get(&i915->gt); - - cancel_delayed_work_sync(&i915->gem.retire_work); - flush_work(&i915->gem.idle_work); + cancel_delayed_work_sync(&i915->gt.requests.retire_work); } static void restore_retire_worker(struct drm_i915_private *i915) { + igt_flush_test(i915); intel_gt_pm_put(&i915->gt); - - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_driver_register__shrinker(i915); } @@ -490,9 +669,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = make_obj_busy(obj); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); goto err_obj; @@ -515,6 +692,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_partial_tiling), + SUBTEST(igt_smoke_tiling), SUBTEST(igt_mmap_offset_exhaustion), }; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index c21d747e7d05..9ec55b3a3815 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -65,9 +65,7 @@ static int igt_fill_blt(void *arg) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) obj->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_fill_blt(obj, ce, val); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; @@ -166,9 +164,7 @@ static int igt_copy_blt(void *arg) if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) dst->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_copy_blt(src, dst, ce); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 94a15e3f6db8..34932871b3a5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,9 +25,7 @@ static int mock_phys_object(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); goto out_obj; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 57ece53c1075..6718da20f35d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -9,6 +9,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_gt.h" #include "i915_vma.h" #include "i915_drv.h" @@ -84,6 +85,8 @@ igt_emit_store_dw(struct i915_vma *vma, *cmd = MI_BATCH_BUFFER_END; i915_gem_object_unpin_map(obj); + intel_gt_chipset_flush(vma->vm->gt); + vma = i915_vma_instance(obj, vma->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -101,40 +104,35 @@ err: return ERR_PTR(err); } -int igt_gpu_fill_dw(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, - unsigned long count, - u32 val) +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val) { - struct i915_address_space *vm = ctx->vm ?: &engine->gt->ggtt->vm; struct i915_request *rq; struct i915_vma *batch; unsigned int flags; int err; - GEM_BUG_ON(vma->size > vm->total); - GEM_BUG_ON(!intel_engine_can_store_dword(engine)); + GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); GEM_BUG_ON(!i915_vma_is_pinned(vma)); batch = igt_emit_store_dw(vma, offset, count, val); if (IS_ERR(batch)) return PTR_ERR(batch); - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_batch; } flags = 0; - if (INTEL_GEN(vm->i915) <= 5) + if (INTEL_GEN(ce->vm->i915) <= 5) flags |= I915_DISPATCH_SECURE; - err = engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); if (err) goto err_request; @@ -156,9 +154,7 @@ int igt_gpu_fill_dw(struct i915_vma *vma, i915_request_add(rq); - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); return 0; @@ -167,7 +163,6 @@ skip_request: err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h index 361a7ef866b0..4221cf84d175 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h @@ -11,9 +11,11 @@ struct i915_request; struct i915_gem_context; -struct intel_engine_cs; struct i915_vma; +struct intel_context; +struct intel_engine_cs; + struct i915_request * igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine); @@ -23,11 +25,8 @@ igt_emit_store_dw(struct i915_vma *vma, unsigned long count, u32 val); -int igt_gpu_fill_dw(struct i915_vma *vma, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u64 offset, - unsigned long count, - u32 val); +int igt_gpu_fill_dw(struct intel_context *ce, + struct i915_vma *vma, u64 offset, + unsigned long count, u32 val); #endif /* __IGT_GEM_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index be8974ccff24..74ddd682c9cd 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -13,7 +13,6 @@ mock_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; - int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -30,13 +29,8 @@ mock_context(struct drm_i915_private *i915, RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); mutex_init(&ctx->mutex); - ret = i915_gem_context_pin_hw_id(ctx); - if (ret < 0) - goto err_engines; - if (name) { struct i915_ppgtt *ppgtt; @@ -48,14 +42,15 @@ mock_context(struct drm_i915_private *i915, if (!ppgtt) goto err_put; + mutex_lock(&ctx->mutex); __set_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } return ctx; -err_engines: - free_engines(rcu_access_pointer(ctx->engines)); err_free: kfree(ctx); return NULL; @@ -73,7 +68,7 @@ void mock_context_close(struct i915_gem_context *ctx) void mock_init_contexts(struct drm_i915_private *i915) { - init_contexts(i915); + init_contexts(&i915->gem.contexts); } struct i915_gem_context * @@ -82,8 +77,6 @@ live_context(struct drm_i915_private *i915, struct drm_file *file) struct i915_gem_context *ctx; int err; - lockdep_assert_held(&i915->drm.struct_mutex); - ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 09c68dda2098..55317081d48b 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -120,7 +120,6 @@ __dma_fence_signal__notify(struct dma_fence *fence, struct dma_fence_cb *cur, *tmp; lockdep_assert_held(fence->lock); - lockdep_assert_irqs_disabled(); list_for_each_entry_safe(cur, tmp, list, node) { INIT_LIST_HEAD(&cur->node); @@ -134,9 +133,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) const ktime_t timestamp = ktime_get(); struct intel_context *ce, *cn; struct list_head *pos, *next; + unsigned long flags; LIST_HEAD(signal); - spin_lock(&b->irq_lock); + spin_lock_irqsave(&b->irq_lock, flags); if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); @@ -182,30 +182,23 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) } } - spin_unlock(&b->irq_lock); + spin_unlock_irqrestore(&b->irq_lock, flags); list_for_each_safe(pos, next, &signal) { struct i915_request *rq = list_entry(pos, typeof(*rq), signal_link); struct list_head cb_list; - spin_lock(&rq->lock); + spin_lock_irqsave(&rq->lock, flags); list_replace(&rq->fence.cb_list, &cb_list); __dma_fence_signal__timestamp(&rq->fence, timestamp); __dma_fence_signal__notify(&rq->fence, &cb_list); - spin_unlock(&rq->lock); + spin_unlock_irqrestore(&rq->lock, flags); i915_request_put(rq); } } -void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) -{ - local_irq_disable(); - intel_engine_breadcrumbs_irq(engine); - local_irq_enable(); -} - static void signal_irq_work(struct irq_work *work) { struct intel_engine_cs *engine = @@ -275,7 +268,6 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) bool i915_request_enable_breadcrumb(struct i915_request *rq) { lockdep_assert_held(&rq->lock); - lockdep_assert_irqs_disabled(); if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) { struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; @@ -325,7 +317,6 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq) struct intel_breadcrumbs *b = &rq->engine->breadcrumbs; lockdep_assert_held(&rq->lock); - lockdep_assert_irqs_disabled(); /* * We must wait for b->irq_lock so that we know the interrupt handler diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index f55691d151ae..59c3083c1ec1 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -62,7 +62,7 @@ int __intel_context_do_pin(struct intel_context *ce) } err = 0; - with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref) + with_intel_runtime_pm(ce->engine->uncore->rpm, wakeref) err = ce->ops->pin(ce); if (err) goto err; @@ -134,10 +134,11 @@ static int __context_pin_state(struct i915_vma *vma) static void __context_unpin_state(struct i915_vma *vma) { - __i915_vma_unpin(vma); i915_vma_make_shrinkable(vma); + __i915_vma_unpin(vma); } +__i915_active_call static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); @@ -150,6 +151,7 @@ static void __intel_context_retire(struct i915_active *active) intel_timeline_unpin(ce->timeline); intel_ring_unpin(ce->ring); + intel_context_put(ce); } @@ -219,12 +221,20 @@ intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine) { + struct i915_address_space *vm; + GEM_BUG_ON(!engine->cops); kref_init(&ce->ref); ce->gem_context = ctx; - ce->vm = i915_vm_get(ctx->vm ?: &engine->gt->ggtt->vm); + rcu_read_lock(); + vm = rcu_dereference(ctx->vm); + if (vm) + ce->vm = i915_vm_get(vm); + else + ce->vm = i915_vm_get(&engine->gt->ggtt->vm); + rcu_read_unlock(); if (ctx->timeline) ce->timeline = intel_timeline_get(ctx->timeline); @@ -238,7 +248,7 @@ intel_context_init(struct intel_context *ce, mutex_init(&ce->pin_mutex); - i915_active_init(ctx->i915, &ce->active, + i915_active_init(&ce->active, __intel_context_active, __intel_context_retire); } @@ -298,14 +308,14 @@ int intel_context_prepare_remote_request(struct intel_context *ce, /* Only suitable for use in remotely modifying this context */ GEM_BUG_ON(rq->hw_context == ce); - if (rq->timeline != tl) { /* beware timeline sharing */ + if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ err = mutex_lock_interruptible_nested(&tl->mutex, SINGLE_DEPTH_NESTING); if (err) return err; /* Queue this switch after current activity by this context. */ - err = i915_active_request_set(&tl->last_request, rq); + err = i915_active_fence_set(&tl->last_request, rq); mutex_unlock(&tl->mutex); if (err) return err; @@ -319,7 +329,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce, * words transfer the pinned ce object to tracked active request. */ GEM_BUG_ON(i915_active_is_idle(&ce->active)); - return i915_active_ref(&ce->active, rq->timeline, rq); + return i915_active_add_request(&ce->active, rq); } struct i915_request *intel_context_create_request(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index bf9cedfccbf0..6959b05ae5f8 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -58,6 +58,7 @@ struct intel_context { u32 *lrc_reg_state; u64 lrc_desc; + u32 tag; /* cookie passed to HW to track this context on submission */ unsigned int active_count; /* protected by timeline->mutex */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index d3c6993f4f46..93ea367fe624 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -136,6 +136,20 @@ execlists_active(const struct intel_engine_execlists *execlists) return READ_ONCE(*execlists->active); } +static inline void +execlists_active_lock_bh(struct intel_engine_execlists *execlists) +{ + local_bh_disable(); /* prevent local softirq and lock recursion */ + tasklet_lock(&execlists->tasklet); +} + +static inline void +execlists_active_unlock_bh(struct intel_engine_execlists *execlists) +{ + tasklet_unlock(&execlists->tasklet); + local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ +} + struct i915_request * execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); @@ -335,7 +349,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine); void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); static inline void @@ -408,8 +421,9 @@ static inline void __intel_engine_reset(struct intel_engine_cs *engine, engine->serial++; /* contexts lost */ } -bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct intel_gt *gt); +bool intel_engine_is_idle(struct intel_engine_cs *engine); +void intel_engine_flush_submission(struct intel_engine_cs *engine); void intel_engines_reset_default_submission(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 82630db0394b..051734c9b733 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -277,6 +277,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); + if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine))) + return -EINVAL; + if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS)) return -EINVAL; @@ -293,6 +296,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); engine->id = id; + engine->legacy_idx = INVALID_ENGINE; engine->mask = BIT(id); engine->i915 = gt->i915; engine->gt = gt; @@ -328,6 +332,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) intel_engine_sanitize_mmio(engine); gt->engine_class[info->class][info->instance] = engine; + gt->engine[id] = engine; intel_engine_add_user(engine); gt->i915->engine[id] = engine; @@ -680,6 +685,8 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) engine->status_page.vma)) goto out_frame; + mutex_lock(&frame->timeline.mutex); + frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); frame->ring.effective_size = frame->ring.size; @@ -688,18 +695,22 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) frame->rq.i915 = engine->i915; frame->rq.engine = engine; frame->rq.ring = &frame->ring; - frame->rq.timeline = &frame->timeline; + rcu_assign_pointer(frame->rq.timeline, &frame->timeline); dw = intel_timeline_pin(&frame->timeline); if (dw < 0) goto out_timeline; + spin_lock_irq(&engine->active.lock); dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; + spin_unlock_irq(&engine->active.lock); + GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ intel_timeline_unpin(&frame->timeline); out_timeline: + mutex_unlock(&frame->timeline.mutex); intel_timeline_fini(&frame->timeline); out_frame: kfree(frame); @@ -730,6 +741,7 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) static struct intel_context * create_kernel_context(struct intel_engine_cs *engine) { + static struct lock_class_key kernel; struct intel_context *ce; int err; @@ -745,6 +757,14 @@ create_kernel_context(struct intel_engine_cs *engine) return ERR_PTR(err); } + /* + * Give our perma-pinned kernel timelines a separate lockdep class, + * so that we can use them from within the normal user timelines + * should we need to inject GPU operations during their request + * construction. + */ + lockdep_set_class(&ce->timeline->mutex, &kernel); + return ce; } @@ -814,8 +834,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->default_state) i915_gem_object_put(engine->default_state); - intel_context_unpin(engine->kernel_context); - intel_context_put(engine->kernel_context); + if (engine->kernel_context) { + intel_context_unpin(engine->kernel_context); + intel_context_put(engine->kernel_context); + } GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); intel_wa_list_free(&engine->ctx_wa_list); @@ -948,6 +970,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone) { struct drm_i915_private *i915 = engine->i915; + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; struct intel_uncore *uncore = engine->uncore; u32 mmio_base = engine->mmio_base; int slice; @@ -965,7 +988,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); - for_each_instdone_slice_subslice(i915, slice, subslice) { + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { instdone->sampler[slice][subslice] = read_subslice_reg(engine, slice, subslice, GEN7_SAMPLER_INSTDONE); @@ -1031,6 +1054,25 @@ static bool ring_is_idle(struct intel_engine_cs *engine) return idle; } +void intel_engine_flush_submission(struct intel_engine_cs *engine) +{ + struct tasklet_struct *t = &engine->execlists.tasklet; + + if (__tasklet_is_scheduled(t)) { + local_bh_disable(); + if (tasklet_trylock(t)) { + /* Must wait for any GPU reset in progress. */ + if (__tasklet_is_enabled(t)) + t->func(t->data); + tasklet_unlock(t); + } + local_bh_enable(); + } + + /* Otherwise flush the tasklet if it was running on another cpu */ + tasklet_unlock_wait(t); +} + /** * intel_engine_is_idle() - Report if the engine has finished process all work * @engine: the intel_engine_cs @@ -1049,21 +1091,9 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) /* Waiting to drain ELSP? */ if (execlists_active(&engine->execlists)) { - struct tasklet_struct *t = &engine->execlists.tasklet; - synchronize_hardirq(engine->i915->drm.pdev->irq); - local_bh_disable(); - if (tasklet_trylock(t)) { - /* Must wait for any GPU reset in progress. */ - if (__tasklet_is_enabled(t)) - t->func(t->data); - tasklet_unlock(t); - } - local_bh_enable(); - - /* Otherwise flush the tasklet if it was on another cpu */ - tasklet_unlock_wait(t); + intel_engine_flush_submission(engine); if (execlists_active(&engine->execlists)) return false; @@ -1093,7 +1123,7 @@ bool intel_engines_are_idle(struct intel_gt *gt) if (!READ_ONCE(gt->awake)) return true; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (!intel_engine_is_idle(engine)) return false; } @@ -1106,7 +1136,7 @@ void intel_engines_reset_default_submission(struct intel_gt *gt) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) engine->set_default_submission(engine); } @@ -1118,6 +1148,8 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) case 3: /* maybe only uses physical not virtual addresses */ return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); + case 4: + return !IS_I965G(engine->i915); /* who knows! */ case 6: return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ default: @@ -1193,13 +1225,43 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len) } } +static struct intel_timeline *get_timeline(struct i915_request *rq) +{ + struct intel_timeline *tl; + + /* + * Even though we are holding the engine->active.lock here, there + * is no control over the submission queue per-se and we are + * inspecting the active state at a random point in time, with an + * unknown queue. Play safe and make sure the timeline remains valid. + * (Only being used for pretty printing, one extra kref shouldn't + * cause a camel stampede!) + */ + rcu_read_lock(); + tl = rcu_dereference(rq->timeline); + if (!kref_get_unless_zero(&tl->kref)) + tl = NULL; + rcu_read_unlock(); + + return tl; +} + +static const char *repr_timer(const struct timer_list *t) +{ + if (!READ_ONCE(t->expires)) + return "inactive"; + + if (timer_pending(t)) + return "active"; + + return "expired"; +} + static void intel_engine_print_registers(struct intel_engine_cs *engine, struct drm_printer *m) { struct drm_i915_private *dev_priv = engine->i915; - const struct intel_engine_execlists * const execlists = - &engine->execlists; - unsigned long flags; + struct intel_engine_execlists * const execlists = &engine->execlists; u64 addr; if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) @@ -1256,19 +1318,20 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, unsigned int idx; u8 read, write; - drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n", - ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), - ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), - num_entries); + drm_printf(m, "\tExeclist tasklet queued? %s (%s), timeslice? %s\n", + yesno(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state)), + enableddisabled(!atomic_read(&engine->execlists.tasklet.count)), + repr_timer(&engine->execlists.timer)); read = execlists->csb_head; write = READ_ONCE(*execlists->csb_write); - drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n", - read, write, - yesno(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state)), - enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); + drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), + ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), + read, write, num_entries); + if (read >= num_entries) read = 0; if (write >= num_entries) @@ -1281,35 +1344,45 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, idx, hws[idx * 2], hws[idx * 2 + 1]); } - spin_lock_irqsave(&engine->active.lock, flags); + execlists_active_lock_bh(execlists); for (port = execlists->active; (rq = *port); port++) { char hdr[80]; int len; len = snprintf(hdr, sizeof(hdr), - "\t\tActive[%d: ", + "\t\tActive[%d]: ", (int)(port - execlists->active)); - if (!i915_request_signaled(rq)) + if (!i915_request_signaled(rq)) { + struct intel_timeline *tl = get_timeline(rq); + len += snprintf(hdr + len, sizeof(hdr) - len, "ring:{start:%08x, hwsp:%08x, seqno:%08x}, ", i915_ggtt_offset(rq->ring->vma), - rq->timeline->hwsp_offset, + tl ? tl->hwsp_offset : 0, hwsp_seqno(rq)); + + if (tl) + intel_timeline_put(tl); + } snprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); } for (port = execlists->pending; (rq = *port); port++) { + struct intel_timeline *tl = get_timeline(rq); char hdr[80]; snprintf(hdr, sizeof(hdr), "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", (int)(port - execlists->pending), i915_ggtt_offset(rq->ring->vma), - rq->timeline->hwsp_offset, + tl ? tl->hwsp_offset : 0, hwsp_seqno(rq)); print_request(m, rq, hdr); + + if (tl) + intel_timeline_put(tl); } - spin_unlock_irqrestore(&engine->active.lock, flags); + execlists_active_unlock_bh(execlists); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", ENGINE_READ(engine, RING_PP_DIR_BASE)); @@ -1385,6 +1458,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_lock_irqsave(&engine->active.lock, flags); rq = intel_engine_find_active_request(engine); if (rq) { + struct intel_timeline *tl = get_timeline(rq); + print_request(m, rq, "\t\tactive "); drm_printf(m, "\t\tring->start: 0x%08x\n", @@ -1397,18 +1472,27 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->ring->emit); drm_printf(m, "\t\tring->space: 0x%08x\n", rq->ring->space); - drm_printf(m, "\t\tring->hwsp: 0x%08x\n", - rq->timeline->hwsp_offset); + + if (tl) { + drm_printf(m, "\t\tring->hwsp: 0x%08x\n", + tl->hwsp_offset); + intel_timeline_put(tl); + } print_request_ring(m, rq); + + if (rq->hw_context->lrc_reg_state) { + drm_printf(m, "Logical Ring Context:\n"); + hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE); + } } spin_unlock_irqrestore(&engine->active.lock, flags); drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); - wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm); if (wakeref) { intel_engine_print_registers(engine, m); - intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref); + intel_runtime_pm_put(engine->uncore->rpm, wakeref); } else { drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } @@ -1440,8 +1524,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - spin_lock_irqsave(&engine->active.lock, flags); - write_seqlock(&engine->stats.lock); + execlists_active_lock_bh(execlists); + write_seqlock_irqsave(&engine->stats.lock, flags); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -1469,8 +1553,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - write_sequnlock(&engine->stats.lock); - spin_unlock_irqrestore(&engine->active.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); + execlists_active_unlock_bh(execlists); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 65b5ca74b394..67eb6183648a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -11,6 +11,7 @@ #include "intel_engine_pool.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_rc6.h" static int __engine_unpark(struct intel_wakeref *wf) { @@ -103,7 +104,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) /* Context switch failed, hope for the best! Maybe reset? */ goto out_unlock; - intel_timeline_enter(rq->timeline); + intel_timeline_enter(i915_request_timeline(rq)); /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; @@ -123,6 +124,19 @@ out_unlock: return result; } +static void call_idle_barriers(struct intel_engine_cs *engine) +{ + struct llist_node *node, *next; + + llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { + struct dma_fence_cb *cb = + container_of((struct list_head *)node, + typeof(*cb), node); + + cb->func(NULL, cb); + } +} + static int __engine_park(struct intel_wakeref *wf) { struct intel_engine_cs *engine = @@ -142,6 +156,8 @@ static int __engine_park(struct intel_wakeref *wf) GEM_TRACE("%s\n", engine->name); + call_idle_barriers(engine); /* cleanup after wedging */ + intel_engine_disarm_breadcrumbs(engine); intel_engine_pool_park(&engine->pool); @@ -169,7 +185,7 @@ static const struct intel_wakeref_ops wf_ops = { void intel_engine_init__pm(struct intel_engine_cs *engine) { - struct intel_runtime_pm *rpm = &engine->i915->runtime_pm; + struct intel_runtime_pm *rpm = engine->uncore->rpm; intel_wakeref_init(&engine->wakeref, rpm, &wf_ops); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c index 4cd54c569911..3cdbd5f8b5be 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c @@ -61,6 +61,7 @@ static int pool_active(struct i915_active *ref) return 0; } +__i915_active_call static void pool_retire(struct i915_active *ref) { struct intel_engine_pool_node *node = @@ -94,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz) return ERR_PTR(-ENOMEM); node->pool = pool; - i915_active_init(engine->i915, &node->active, pool_active, pool_retire); + i915_active_init(&node->active, pool_active, pool_retire); obj = i915_gem_object_create_internal(engine->i915, sz); if (IS_ERR(obj)) { @@ -107,9 +108,19 @@ node_create(struct intel_engine_pool *pool, size_t sz) return node; } +static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine) +{ + if (intel_engine_is_virtual(engine)) + engine = intel_virtual_engine_get_sibling(engine, 0); + + GEM_BUG_ON(!engine); + return &engine->pool; +} + struct intel_engine_pool_node * -intel_engine_pool_get(struct intel_engine_pool *pool, size_t size) +intel_engine_get_pool(struct intel_engine_cs *engine, size_t size) { + struct intel_engine_pool *pool = lookup_pool(engine); struct intel_engine_pool_node *node; struct list_head *list; unsigned long flags; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h index 8d069efd9457..1bd89cadc3b7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h @@ -12,13 +12,13 @@ #include "i915_request.h" struct intel_engine_pool_node * -intel_engine_pool_get(struct intel_engine_pool *pool, size_t size); +intel_engine_get_pool(struct intel_engine_cs *engine, size_t size); static inline int intel_engine_pool_mark_active(struct intel_engine_pool_node *node, struct i915_request *rq) { - return i915_active_ref(&node->active, rq->timeline, rq); + return i915_active_add_request(&node->active, rq); } static inline void diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index a82cea95c2f2..3451be034caf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -148,6 +148,7 @@ enum intel_engine_id { VECS1, #define _VECS(n) (VECS0 + (n)) I915_NUM_ENGINES +#define INVALID_ENGINE ((enum intel_engine_id)-1) }; struct st_preempt_hang { @@ -303,10 +304,12 @@ struct intel_engine_cs { u8 uabi_class; u8 uabi_instance; + u32 uabi_capabilities; u32 context_size; u32 mmio_base; - u32 uabi_capabilities; + unsigned int context_tag; +#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) struct rb_node uabi_node; @@ -481,6 +484,7 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) #define I915_ENGINE_IS_VIRTUAL BIT(5) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) unsigned int flags; /* @@ -576,20 +580,24 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_IS_VIRTUAL; } -#define instdone_slice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) +static inline bool +intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) +{ + return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO; +} -#define instdone_subslice_mask(dev_priv__) \ - (IS_GEN(dev_priv__, 7) ? \ - 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) +#define instdone_has_slice(dev_priv___, sseu___, slice___) \ + ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) -#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ - for ((slice__) = 0, (subslice__) = 0; \ - (slice__) < I915_MAX_SLICES; \ - (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ - (slice__) += ((subslice__) == 0)) \ - for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ - (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) +#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ + (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \ + intel_sseu_has_subslice(sseu__, 0, subslice__)) +#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ + for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ + (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ + (slice_) += ((subslice_) == 0)) \ + for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ + (instdone_has_subslice(dev_priv_, sseu_, slice_, \ + subslice_))) #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 77cd5de83930..7f7150a733f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -160,10 +160,10 @@ static int legacy_ring_idx(const struct legacy_ring *ring) }; if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map))) - return -1; + return INVALID_ENGINE; if (GEM_DEBUG_WARN_ON(ring->instance >= map[ring->class].max)) - return -1; + return INVALID_ENGINE; return map[ring->class].base + ring->instance; } @@ -171,23 +171,15 @@ static int legacy_ring_idx(const struct legacy_ring *ring) static void add_legacy_ring(struct legacy_ring *ring, struct intel_engine_cs *engine) { - int idx; - if (engine->gt != ring->gt || engine->class != ring->class) { ring->gt = engine->gt; ring->class = engine->class; ring->instance = 0; } - idx = legacy_ring_idx(ring); - if (unlikely(idx == -1)) - return; - - GEM_BUG_ON(idx >= ARRAY_SIZE(ring->gt->engine)); - ring->gt->engine[idx] = engine; - ring->instance++; - - engine->legacy_idx = idx; + engine->legacy_idx = legacy_ring_idx(ring); + if (engine->legacy_idx != INVALID_ENGINE) + ring->instance++; } void intel_engines_driver_register(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 86e00a2db8a4..4294f146f13c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -112,6 +112,7 @@ #define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ +#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */ #define MI_SEMAPHORE_POLL (1 << 15) #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) @@ -119,6 +120,8 @@ #define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) #define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) +#define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5) +#define MI_SEMAPHORE_TOKEN_SHIFT 5 #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ @@ -132,7 +135,10 @@ * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ +#define MI_LRI_CS_MMIO (1<<19) #define MI_LRI_FORCE_POSTED (1<<12) +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) #define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) #define MI_SRM_LRM_GLOBAL_GTT (1<<22) @@ -147,6 +153,7 @@ #define MI_FLUSH_DW_USE_PPGTT (0<<2) #define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) #define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) +#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1) #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE (1) /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ @@ -156,7 +163,8 @@ #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) -#define MI_BATCH_RESOURCE_STREAMER (1<<10) +#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10) +#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/ /* * 3D instructions used by the kernel @@ -217,6 +225,7 @@ #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) #define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) +#define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) @@ -224,7 +233,9 @@ #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ +#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) +#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */ #define PIPE_CONTROL_NOTIFY (1<<8) #define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ #define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) @@ -235,6 +246,29 @@ #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) #define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ +#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1) +#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2)) +/* Opcodes for MI_MATH_INSTR */ +#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0) +#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2) +#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2) +#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1) +#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1) +#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0) +#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0) +#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0) +#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0) +#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0) +#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2) +#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2) +/* Registers used as operands in MI_MATH_INSTR */ +#define MI_MATH_REG(x) (x) +#define MI_MATH_REG_SRCA 0x20 +#define MI_MATH_REG_SRCB 0x21 +#define MI_MATH_REG_ACCU 0x31 +#define MI_MATH_REG_ZF 0x32 +#define MI_MATH_REG_CF 0x33 + /* * Commands used only by the command parser */ @@ -251,7 +285,6 @@ #define MI_CLFLUSH MI_INSTR(0x27, 0) #define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) #define MI_REPORT_PERF_COUNT_GGTT (1<<0) -#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) #define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) #define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) #define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d48ec9a76ed1..1c4b6c9642ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -6,7 +6,11 @@ #include "i915_drv.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_mocs.h" +#include "intel_rc6.h" #include "intel_uncore.h" +#include "intel_pm.h" void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { @@ -20,13 +24,106 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_gt_init_hangcheck(gt); intel_gt_init_reset(gt); + intel_gt_init_requests(gt); intel_gt_pm_init_early(gt); intel_uc_init_early(>->uc); } -void intel_gt_init_hw(struct drm_i915_private *i915) +void intel_gt_init_hw_early(struct drm_i915_private *i915) { i915->gt.ggtt = &i915->ggtt; + + /* BIOS often leaves RC6 enabled, but disable it for hw init */ + intel_gt_pm_disable(&i915->gt); +} + +static void init_unused_ring(struct intel_gt *gt, u32 base) +{ + struct intel_uncore *uncore = gt->uncore; + + intel_uncore_write(uncore, RING_CTL(base), 0); + intel_uncore_write(uncore, RING_HEAD(base), 0); + intel_uncore_write(uncore, RING_TAIL(base), 0); + intel_uncore_write(uncore, RING_START(base), 0); +} + +static void init_unused_rings(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + + if (IS_I830(i915)) { + init_unused_ring(gt, PRB1_BASE); + init_unused_ring(gt, SRB0_BASE); + init_unused_ring(gt, SRB1_BASE); + init_unused_ring(gt, SRB2_BASE); + init_unused_ring(gt, SRB3_BASE); + } else if (IS_GEN(i915, 2)) { + init_unused_ring(gt, SRB0_BASE); + init_unused_ring(gt, SRB1_BASE); + } else if (IS_GEN(i915, 3)) { + init_unused_ring(gt, PRB1_BASE); + init_unused_ring(gt, PRB2_BASE); + } +} + +int intel_gt_init_hw(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + int ret; + + BUG_ON(!i915->kernel_context); + ret = intel_gt_terminally_wedged(gt); + if (ret) + return ret; + + gt->last_init_time = ktime_get(); + + /* Double layer security blanket, see i915_gem_init() */ + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) + intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); + + if (IS_HASWELL(i915)) + intel_uncore_write(uncore, + MI_PREDICATE_RESULT_2, + IS_HSW_GT3(i915) ? + LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); + + /* Apply the GT workarounds... */ + intel_gt_apply_workarounds(gt); + /* ...and determine whether they are sticking. */ + intel_gt_verify_workarounds(gt, "init"); + + intel_gt_init_swizzling(gt); + + /* + * At least 830 can leave some of the unused rings + * "active" (ie. head != tail) after resume which + * will prevent c3 entry. Makes sure all unused rings + * are totally idle. + */ + init_unused_rings(gt); + + ret = i915_ppgtt_init_hw(gt); + if (ret) { + DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); + goto out; + } + + /* We can't enable contexts until all firmware is loaded */ + ret = intel_uc_init_hw(>->uc); + if (ret) { + i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); + goto out; + } + + intel_mocs_init(gt); + +out: + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + return ret; } static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set) @@ -89,7 +186,7 @@ intel_gt_clear_error_registers(struct intel_gt *gt, struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine_masked(engine, i915, engine_mask, id) + for_each_engine_masked(engine, gt, engine_mask, id) gen8_clear_engine_error_register(engine); } } @@ -100,7 +197,7 @@ static void gen6_check_faults(struct intel_gt *gt) enum intel_engine_id id; u32 fault; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { fault = GEN6_RING_FAULT_REG_READ(engine); if (fault & RING_FAULT_VALID) { DRM_DEBUG_DRIVER("Unexpected fault\n" @@ -176,7 +273,7 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt) void intel_gt_flush_ggtt_writes(struct intel_gt *gt) { - struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; intel_wakeref_t wakeref; /* @@ -200,18 +297,18 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) wmb(); - if (INTEL_INFO(i915)->has_coherent_ggtt) + if (INTEL_INFO(gt->i915)->has_coherent_ggtt) return; intel_gt_chipset_flush(gt); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - struct intel_uncore *uncore = gt->uncore; + with_intel_runtime_pm(uncore->rpm, wakeref) { + unsigned long flags; - spin_lock_irq(&uncore->lock); + spin_lock_irqsave(&uncore->lock, flags); intel_uncore_posting_read_fw(uncore, RING_HEAD(RENDER_RING_BASE)); - spin_unlock_irq(&uncore->lock); + spin_unlock_irqrestore(&uncore->lock, flags); } } @@ -222,7 +319,13 @@ void intel_gt_chipset_flush(struct intel_gt *gt) intel_gtt_chipset_flush(); } -int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) +void intel_gt_driver_register(struct intel_gt *gt) +{ + if (IS_GEN(gt->i915, 5)) + intel_gpu_ips_init(gt->i915); +} + +static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) { struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; @@ -230,7 +333,7 @@ int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) int ret; obj = i915_gem_object_create_stolen(i915, size); - if (!obj) + if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); @@ -256,11 +359,44 @@ err_unref: return ret; } -void intel_gt_fini_scratch(struct intel_gt *gt) +static void intel_gt_fini_scratch(struct intel_gt *gt) { i915_vma_unpin_and_release(>->scratch, 0); } +int intel_gt_init(struct intel_gt *gt) +{ + int err; + + err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); + if (err) + return err; + + intel_gt_pm_init(gt); + + return 0; +} + +void intel_gt_driver_remove(struct intel_gt *gt) +{ + GEM_BUG_ON(gt->awake); + intel_gt_pm_disable(gt); +} + +void intel_gt_driver_unregister(struct intel_gt *gt) +{ + intel_gpu_ips_teardown(); +} + +void intel_gt_driver_release(struct intel_gt *gt) +{ + /* Paranoia: make sure we have disabled everything before we exit. */ + intel_gt_pm_disable(gt); + intel_gt_pm_fini(gt); + + intel_gt_fini_scratch(gt); +} + void intel_gt_driver_late_release(struct intel_gt *gt) { intel_uc_driver_late_release(>->uc); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 4920cb351f10..e6ab0bff0efb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -28,7 +28,14 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc) } void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915); -void intel_gt_init_hw(struct drm_i915_private *i915); +void intel_gt_init_hw_early(struct drm_i915_private *i915); +int __must_check intel_gt_init_hw(struct intel_gt *gt); +int intel_gt_init(struct intel_gt *gt); +void intel_gt_driver_register(struct intel_gt *gt); + +void intel_gt_driver_unregister(struct intel_gt *gt); +void intel_gt_driver_remove(struct intel_gt *gt); +void intel_gt_driver_release(struct intel_gt *gt); void intel_gt_driver_late_release(struct intel_gt *gt); @@ -41,9 +48,6 @@ void intel_gt_chipset_flush(struct intel_gt *gt); void intel_gt_init_hangcheck(struct intel_gt *gt); -int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size); -void intel_gt_fini_scratch(struct intel_gt *gt); - static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt, enum intel_gt_scratch_field field) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 1363e069ec83..b866d5b1eee0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -5,16 +5,20 @@ */ #include "i915_drv.h" +#include "i915_globals.h" #include "i915_params.h" +#include "intel_context.h" #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_pm.h" +#include "intel_rc6.h" #include "intel_wakeref.h" -static void pm_notify(struct drm_i915_private *i915, int state) +static void pm_notify(struct intel_gt *gt, int state) { - blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915); + blocking_notifier_call_chain(>->pm_notifications, state, gt->i915); } static int __gt_unpark(struct intel_wakeref *wf) @@ -24,6 +28,8 @@ static int __gt_unpark(struct intel_wakeref *wf) GEM_TRACE("\n"); + i915_globals_unpark(); + /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -47,21 +53,23 @@ static int __gt_unpark(struct intel_wakeref *wf) i915_pmu_gt_unparked(i915); intel_gt_queue_hangcheck(gt); + intel_gt_unpark_requests(gt); - pm_notify(i915, INTEL_GT_UNPARK); + pm_notify(gt, INTEL_GT_UNPARK); return 0; } static int __gt_park(struct intel_wakeref *wf) { - struct drm_i915_private *i915 = - container_of(wf, typeof(*i915), gt.wakeref); - intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake); + struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); + intel_wakeref_t wakeref = fetch_and_zero(>->awake); + struct drm_i915_private *i915 = gt->i915; GEM_TRACE("\n"); - pm_notify(i915, INTEL_GT_PARK); + pm_notify(gt, INTEL_GT_PARK); + intel_gt_park_requests(gt); i915_pmu_gt_parked(i915); if (INTEL_GEN(i915) >= 6) @@ -73,6 +81,8 @@ static int __gt_park(struct intel_wakeref *wf) GEM_BUG_ON(!wakeref); intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + i915_globals_park(); + return 0; } @@ -84,11 +94,21 @@ static const struct intel_wakeref_ops wf_ops = { void intel_gt_pm_init_early(struct intel_gt *gt) { - intel_wakeref_init(>->wakeref, >->i915->runtime_pm, &wf_ops); + intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); BLOCKING_INIT_NOTIFIER_HEAD(>->pm_notifications); } +void intel_gt_pm_init(struct intel_gt *gt) +{ + /* + * Enabling power-management should be "self-healing". If we cannot + * enable a feature, simply leave it disabled with a notice to the + * user. + */ + intel_rc6_init(>->rc6); +} + static bool reset_engines(struct intel_gt *gt) { if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) @@ -116,11 +136,29 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force) intel_uc_sanitize(>->uc); - if (!reset_engines(gt) && !force) - return; + for_each_engine(engine, gt, id) + if (engine->reset.prepare) + engine->reset.prepare(engine); + + if (reset_engines(gt) || force) { + for_each_engine(engine, gt, id) + __intel_engine_reset(engine, false); + } + + for_each_engine(engine, gt, id) + if (engine->reset.finish) + engine->reset.finish(engine); +} - for_each_engine(engine, gt->i915, id) - __intel_engine_reset(engine, false); +void intel_gt_pm_disable(struct intel_gt *gt) +{ + if (!is_mock_gt(gt)) + intel_sanitize_gt_powersave(gt->i915); +} + +void intel_gt_pm_fini(struct intel_gt *gt) +{ + intel_rc6_fini(>->rc6); } int intel_gt_resume(struct intel_gt *gt) @@ -136,14 +174,21 @@ int intel_gt_resume(struct intel_gt *gt) * allowing us to fixup the user contexts on their first pin. */ intel_gt_pm_get(gt); - for_each_engine(engine, gt->i915, id) { + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + intel_rc6_sanitize(>->rc6); + + for_each_engine(engine, gt, id) { struct intel_context *ce; intel_engine_pm_get(engine); ce = engine->kernel_context; - if (ce) + if (ce) { + GEM_BUG_ON(!intel_context_is_pinned(ce)); + mutex_acquire(&ce->pin_mutex.dep_map, 0, 0, _THIS_IP_); ce->ops->reset(ce); + mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_); + } engine->serial++; /* kernel context lost */ err = engine->resume(engine); @@ -156,11 +201,38 @@ int intel_gt_resume(struct intel_gt *gt) break; } } + + intel_rc6_enable(>->rc6); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_gt_pm_put(gt); return err; } +static void wait_for_idle(struct intel_gt *gt) +{ + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + } + + intel_gt_pm_wait_for_idle(gt); +} + +void intel_gt_suspend(struct intel_gt *gt) +{ + intel_wakeref_t wakeref; + + /* We expect to be idle already; but also want to be independent */ + wait_for_idle(gt); + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + intel_rc6_disable(>->rc6); +} + void intel_gt_runtime_suspend(struct intel_gt *gt) { intel_uc_runtime_suspend(>->uc); @@ -172,3 +244,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt) return intel_uc_runtime_resume(>->uc); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_gt_pm.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index fb39d99cd6ee..997770d3a968 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -43,10 +43,21 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) } void intel_gt_pm_init_early(struct intel_gt *gt); +void intel_gt_pm_init(struct intel_gt *gt); +void intel_gt_pm_disable(struct intel_gt *gt); +void intel_gt_pm_fini(struct intel_gt *gt); void intel_gt_sanitize(struct intel_gt *gt, bool force); + int intel_gt_resume(struct intel_gt *gt); +void intel_gt_suspend(struct intel_gt *gt); + void intel_gt_runtime_suspend(struct intel_gt *gt); int intel_gt_runtime_resume(struct intel_gt *gt); +static inline bool is_mock_gt(const struct intel_gt *gt) +{ + return I915_SELFTEST_ONLY(gt->awake == -ENODEV); +} + #endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c new file mode 100644 index 000000000000..b73229a84d85 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -0,0 +1,137 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" /* for_each_engine() */ +#include "i915_request.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_timeline.h" + +static void retire_requests(struct intel_timeline *tl) +{ + struct i915_request *rq, *rn; + + list_for_each_entry_safe(rq, rn, &tl->requests, link) + if (!i915_request_retire(rq)) + break; +} + +static void flush_submission(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + intel_engine_flush_submission(engine); +} + +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) +{ + struct intel_gt_timelines *timelines = >->timelines; + struct intel_timeline *tl, *tn; + unsigned long active_count = 0; + unsigned long flags; + bool interruptible; + LIST_HEAD(free); + + interruptible = true; + if (unlikely(timeout < 0)) + timeout = -timeout, interruptible = false; + + flush_submission(gt); /* kick the ksoftirqd tasklets */ + + spin_lock_irqsave(&timelines->lock, flags); + list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { + if (!mutex_trylock(&tl->mutex)) { + active_count++; /* report busy to caller, try again? */ + continue; + } + + intel_timeline_get(tl); + GEM_BUG_ON(!tl->active_count); + tl->active_count++; /* pin the list element */ + spin_unlock_irqrestore(&timelines->lock, flags); + + if (timeout > 0) { + struct dma_fence *fence; + + fence = i915_active_fence_get(&tl->last_request); + if (fence) { + timeout = dma_fence_wait_timeout(fence, + interruptible, + timeout); + dma_fence_put(fence); + } + } + + retire_requests(tl); + + spin_lock_irqsave(&timelines->lock, flags); + + /* Resume iteration after dropping lock */ + list_safe_reset_next(tl, tn, link); + if (--tl->active_count) + active_count += !!rcu_access_pointer(tl->last_request.fence); + else + list_del(&tl->link); + + mutex_unlock(&tl->mutex); + + /* Defer the final release to after the spinlock */ + if (refcount_dec_and_test(&tl->kref.refcount)) { + GEM_BUG_ON(tl->active_count); + list_add(&tl->link, &free); + } + } + spin_unlock_irqrestore(&timelines->lock, flags); + + list_for_each_entry_safe(tl, tn, &free, link) + __intel_timeline_free(&tl->kref); + + return active_count ? timeout : 0; +} + +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) +{ + /* If the device is asleep, we have no requests outstanding */ + if (!intel_gt_pm_is_awake(gt)) + return 0; + + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + } + + return timeout; +} + +static void retire_work_handler(struct work_struct *work) +{ + struct intel_gt *gt = + container_of(work, typeof(*gt), requests.retire_work.work); + + intel_gt_retire_requests(gt); + schedule_delayed_work(>->requests.retire_work, + round_jiffies_up_relative(HZ)); +} + +void intel_gt_init_requests(struct intel_gt *gt) +{ + INIT_DELAYED_WORK(>->requests.retire_work, retire_work_handler); +} + +void intel_gt_park_requests(struct intel_gt *gt) +{ + cancel_delayed_work(>->requests.retire_work); +} + +void intel_gt_unpark_requests(struct intel_gt *gt) +{ + schedule_delayed_work(>->requests.retire_work, + round_jiffies_up_relative(HZ)); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h new file mode 100644 index 000000000000..bd31cbce47e0 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_REQUESTS_H +#define INTEL_GT_REQUESTS_H + +struct intel_gt; + +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); +static inline void intel_gt_retire_requests(struct intel_gt *gt) +{ + intel_gt_retire_requests_timeout(gt, 0); +} + +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); + +void intel_gt_init_requests(struct intel_gt *gt); +void intel_gt_park_requests(struct intel_gt *gt); +void intel_gt_unpark_requests(struct intel_gt *gt); + +#endif /* INTEL_GT_REQUESTS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index dc295c196d11..ae4aaf75ac78 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -17,7 +17,9 @@ #include "i915_vma.h" #include "intel_engine_types.h" +#include "intel_llc_types.h" #include "intel_reset_types.h" +#include "intel_rc6_types.h" #include "intel_wakeref.h" struct drm_i915_private; @@ -49,7 +51,19 @@ struct intel_gt { struct list_head hwsp_free_list; } timelines; + struct intel_gt_requests { + /** + * We leave the user IRQ off as much as possible, + * but this means that requests will finish and never + * be retired once the system goes idle. Set a timer to + * fire periodically while the ring is running. When it + * fires, go retire requests. + */ + struct delayed_work retire_work; + } requests; + struct intel_wakeref wakeref; + atomic_t user_wakeref; struct list_head closed_vma; spinlock_t closed_lock; /* guards the list of closed_vma */ @@ -66,6 +80,9 @@ struct intel_gt { */ intel_wakeref_t awake; + struct intel_llc llc; + struct intel_rc6 rc6; + struct blocking_notifier_head pm_notifications; ktime_t last_init_time; @@ -89,14 +106,16 @@ enum intel_gt_scratch_field { INTEL_GT_SCRATCH_FIELD_DEFAULT = 0, /* 8 bytes */ - INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA = 128, - - /* 8 bytes */ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128, /* 8 bytes */ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256, + /* 6 * 8 bytes */ + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048, + + /* 4 bytes */ + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096, }; #endif /* __INTEL_GT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index 05d042cdefe2..0fdef00af9e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -53,6 +53,7 @@ static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) static bool subunits_stuck(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; struct intel_instdone instdone; struct intel_instdone *accu_instdone = &engine->hangcheck.instdone; bool stuck; @@ -71,7 +72,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine) stuck &= instdone_unchanged(instdone.slice_common, &accu_instdone->slice_common); - for_each_instdone_slice_subslice(dev_priv, slice, subslice) { + for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) { stuck &= instdone_unchanged(instdone.sampler[slice][subslice], &accu_instdone->sampler[slice][subslice]); stuck &= instdone_unchanged(instdone.row[slice][subslice], @@ -236,7 +237,7 @@ static void hangcheck_declare_hang(struct intel_gt *gt, hung &= ~stuck; len = scnprintf(msg, sizeof(msg), "%s on ", stuck == hung ? "no progress" : "hang"); - for_each_engine_masked(engine, gt->i915, hung, tmp) + for_each_engine_masked(engine, gt, hung, tmp) len += scnprintf(msg + len, sizeof(msg) - len, "%s, ", engine->name); msg[len-2] = '\0'; @@ -270,7 +271,7 @@ static void hangcheck_elapsed(struct work_struct *work) if (intel_gt_is_wedged(gt)) return; - wakeref = intel_runtime_pm_get_if_in_use(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(gt->uncore->rpm); if (!wakeref) return; @@ -280,10 +281,10 @@ static void hangcheck_elapsed(struct work_struct *work) */ intel_uncore_arm_unclaimed_mmio_detection(gt->uncore); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct hangcheck hc; - intel_engine_signal_breadcrumbs(engine); + intel_engine_breadcrumbs_irq(engine); hangcheck_load_sample(engine, &hc); hangcheck_accumulate_sample(engine, &hc); @@ -302,7 +303,7 @@ static void hangcheck_elapsed(struct work_struct *work) if (GEM_SHOW_DEBUG() && (hung | stuck)) { struct drm_printer p = drm_debug_printer("hangcheck"); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (intel_engine_is_idle(engine)) continue; @@ -321,7 +322,7 @@ static void hangcheck_elapsed(struct work_struct *work) if (hung) hangcheck_declare_hang(gt, hung, stuck); - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); /* Reset timer in case GPU hangs without another request being added */ intel_gt_queue_hangcheck(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c new file mode 100644 index 000000000000..35093eb5f24e --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc.c @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/cpufreq.h> + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_llc.h" +#include "intel_sideband.h" + +struct ia_constants { + unsigned int min_gpu_freq; + unsigned int max_gpu_freq; + + unsigned int min_ring_freq; + unsigned int max_ia_freq; +}; + +static struct intel_gt *llc_to_gt(struct intel_llc *llc) +{ + return container_of(llc, struct intel_gt, llc); +} + +static unsigned int cpu_max_MHz(void) +{ + struct cpufreq_policy *policy; + unsigned int max_khz; + + policy = cpufreq_cpu_get(0); + if (policy) { + max_khz = policy->cpuinfo.max_freq; + cpufreq_cpu_put(policy); + } else { + /* + * Default to measured freq if none found, PCU will ensure we + * don't go over + */ + max_khz = tsc_khz; + } + + return max_khz / 1000; +} + +static bool get_ia_constants(struct intel_llc *llc, + struct ia_constants *consts) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct intel_rps *rps = &i915->gt_pm.rps; + + if (rps->max_freq <= rps->min_freq) + return false; + + consts->max_ia_freq = cpu_max_MHz(); + + consts->min_ring_freq = + intel_uncore_read(llc_to_gt(llc)->uncore, DCLK) & 0xf; + /* convert DDR frequency from units of 266.6MHz to bandwidth */ + consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3); + + consts->min_gpu_freq = rps->min_freq; + consts->max_gpu_freq = rps->max_freq; + if (INTEL_GEN(i915) >= 9) { + /* Convert GT frequency to 50 HZ units */ + consts->min_gpu_freq /= GEN9_FREQ_SCALER; + consts->max_gpu_freq /= GEN9_FREQ_SCALER; + } + + return true; +} + +static void calc_ia_freq(struct intel_llc *llc, + unsigned int gpu_freq, + const struct ia_constants *consts, + unsigned int *out_ia_freq, + unsigned int *out_ring_freq) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + const int diff = consts->max_gpu_freq - gpu_freq; + unsigned int ia_freq = 0, ring_freq = 0; + + if (INTEL_GEN(i915) >= 9) { + /* + * ring_freq = 2 * GT. ring_freq is in 100MHz units + * No floor required for ring frequency on SKL. + */ + ring_freq = gpu_freq; + } else if (INTEL_GEN(i915) >= 8) { + /* max(2 * GT, DDR). NB: GT is 50MHz units */ + ring_freq = max(consts->min_ring_freq, gpu_freq); + } else if (IS_HASWELL(i915)) { + ring_freq = mult_frac(gpu_freq, 5, 4); + ring_freq = max(consts->min_ring_freq, ring_freq); + /* leave ia_freq as the default, chosen by cpufreq */ + } else { + const int min_freq = 15; + const int scale = 180; + + /* + * On older processors, there is no separate ring + * clock domain, so in order to boost the bandwidth + * of the ring, we need to upclock the CPU (ia_freq). + * + * For GPU frequencies less than 750MHz, + * just use the lowest ring freq. + */ + if (gpu_freq < min_freq) + ia_freq = 800; + else + ia_freq = consts->max_ia_freq - diff * scale / 2; + ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); + } + + *out_ia_freq = ia_freq; + *out_ring_freq = ring_freq; +} + +static void gen6_update_ring_freq(struct intel_llc *llc) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct ia_constants consts; + unsigned int gpu_freq; + + if (!get_ia_constants(llc, &consts)) + return; + + /* + * For each potential GPU frequency, load a ring frequency we'd like + * to use for memory access. We do this by specifying the IA frequency + * the PCU should use as a reference to determine the ring frequency. + */ + for (gpu_freq = consts.max_gpu_freq; + gpu_freq >= consts.min_gpu_freq; + gpu_freq--) { + unsigned int ia_freq, ring_freq; + + calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq); + sandybridge_pcode_write(i915, + GEN6_PCODE_WRITE_MIN_FREQ_TABLE, + ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | + ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | + gpu_freq); + } +} + +void intel_llc_enable(struct intel_llc *llc) +{ + if (HAS_LLC(llc_to_gt(llc)->i915)) + gen6_update_ring_freq(llc); +} + +void intel_llc_disable(struct intel_llc *llc) +{ + /* Currently there is no HW configuration to be done to disable. */ +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_llc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_llc.h b/drivers/gpu/drm/i915/gt/intel_llc.h new file mode 100644 index 000000000000..ef09a890d2b7 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_LLC_H +#define INTEL_LLC_H + +struct intel_llc; + +void intel_llc_enable(struct intel_llc *llc); +void intel_llc_disable(struct intel_llc *llc); + +#endif /* INTEL_LLC_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_llc_types.h b/drivers/gpu/drm/i915/gt/intel_llc_types.h new file mode 100644 index 000000000000..ecad4687b930 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_llc_types.h @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_LLC_TYPES_H +#define INTEL_LLC_TYPES_H + +struct intel_llc { +}; + +#endif /* INTEL_LLC_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d42584439f51..d0088d020220 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -230,9 +230,31 @@ static int __execlists_context_alloc(struct intel_context *ce, struct intel_engine_cs *engine); static void execlists_init_reg_state(u32 *reg_state, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring); + const struct intel_context *ce, + const struct intel_engine_cs *engine, + const struct intel_ring *ring, + bool close); + +static void __context_pin_acquire(struct intel_context *ce) +{ + mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _RET_IP_); +} + +static void __context_pin_release(struct intel_context *ce) +{ + mutex_release(&ce->pin_mutex.dep_map, 0, _RET_IP_); +} + +static void mark_eio(struct i915_request *rq) +{ + if (i915_request_completed(rq)) + return; + + GEM_BUG_ON(i915_request_signaled(rq)); + + dma_fence_set_error(&rq->fence, -EIO); + i915_request_mark_complete(rq); +} static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) { @@ -330,10 +352,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * However, the priority hint is a mere hint that we may need to * preempt. If that hint is stale or we may be trying to preempt * ourselves, ignore the request. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. */ - last_prio = effective_prio(rq); - if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint, - last_prio)) + last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); + if (engine->execlists.queue_priority_hint <= last_prio) return false; /* @@ -422,12 +449,8 @@ assert_priority_queue(const struct i915_request *prev, static u64 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - struct i915_gem_context *ctx = ce->gem_context; u64 desc; - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); - desc = INTEL_LEGACY_32B_CONTEXT; if (i915_vm_is_4lvl(ce->vm)) desc = INTEL_LEGACY_64B_CONTEXT; @@ -445,25 +468,372 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) * anything below. */ if (INTEL_GEN(engine->i915) >= 11) { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; - /* bits 37-47 */ - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; /* bits 48-53 */ - /* TODO: decide what to do with SW counter (bits 55-60) */ - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; /* bits 61-63 */ - } else { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ } return desc; } +static u32 *set_offsets(u32 *regs, + const u8 *data, + const struct intel_engine_cs *engine) +#define NOP(x) (BIT(7) | (x)) +#define LRI(count, flags) ((flags) << 6 | (count)) +#define POSTED BIT(0) +#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) +#define REG16(x) \ + (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ + (((x) >> 2) & 0x7f) +#define END() 0 +{ + const u32 base = engine->mmio_base; + + while (*data) { + u8 count, flags; + + if (*data & BIT(7)) { /* skip */ + regs += *data++ & ~BIT(7); + continue; + } + + count = *data & 0x3f; + flags = *data >> 6; + data++; + + *regs = MI_LOAD_REGISTER_IMM(count); + if (flags & POSTED) + *regs |= MI_LRI_FORCE_POSTED; + if (INTEL_GEN(engine->i915) >= 11) + *regs |= MI_LRI_CS_MMIO; + regs++; + + GEM_BUG_ON(!count); + do { + u32 offset = 0; + u8 v; + + do { + v = *data++; + offset <<= 7; + offset |= v & ~BIT(7); + } while (v & BIT(7)); + + *regs = base + (offset << 2); + regs += 2; + } while (--count); + } + + return regs; +} + +static const u8 gen8_xcs_offsets[] = { + NOP(1), + LRI(11, 0), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + + NOP(9), + LRI(9, 0), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(2, 0), + REG16(0x200), + REG(0x028), + + END(), +}; + +static const u8 gen9_xcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, POSTED), + REG16(0x200), + + NOP(13), + LRI(44, POSTED), + REG(0x028), + REG(0x09c), + REG(0x0c0), + REG(0x178), + REG(0x17c), + REG16(0x358), + REG(0x170), + REG(0x150), + REG(0x154), + REG(0x158), + REG16(0x41c), + REG16(0x600), + REG16(0x604), + REG16(0x608), + REG16(0x60c), + REG16(0x610), + REG16(0x614), + REG16(0x618), + REG16(0x61c), + REG16(0x620), + REG16(0x624), + REG16(0x628), + REG16(0x62c), + REG16(0x630), + REG16(0x634), + REG16(0x638), + REG16(0x63c), + REG16(0x640), + REG16(0x644), + REG16(0x648), + REG16(0x64c), + REG16(0x650), + REG16(0x654), + REG16(0x658), + REG16(0x65c), + REG16(0x660), + REG16(0x664), + REG16(0x668), + REG16(0x66c), + REG16(0x670), + REG16(0x674), + REG16(0x678), + REG16(0x67c), + REG(0x068), + + END(), +}; + +static const u8 gen12_xcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END(), +}; + +static const u8 gen8_rcs_offsets[] = { + NOP(1), + LRI(14, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + + NOP(3), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + NOP(13), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +static const u8 gen11_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x11c), + REG(0x114), + REG(0x118), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(1, POSTED), + REG(0x1b0), + + NOP(10), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +static const u8 gen12_rcs_offsets[] = { + NOP(1), + LRI(13, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + + NOP(5), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END(), +}; + +#undef END +#undef REG16 +#undef REG +#undef LRI +#undef NOP + +static const u8 *reg_offsets(const struct intel_engine_cs *engine) +{ + /* + * The gen12+ lists only have the registers we program in the basic + * default state. We rely on the context image using relative + * addressing to automatic fixup the register state between the + * physical engines for virtual engine. + */ + GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 && + !intel_engine_has_relative_mmio(engine)); + + if (engine->class == RENDER_CLASS) { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_rcs_offsets; + else if (INTEL_GEN(engine->i915) >= 11) + return gen11_rcs_offsets; + else + return gen8_rcs_offsets; + } else { + if (INTEL_GEN(engine->i915) >= 12) + return gen12_xcs_offsets; + else if (INTEL_GEN(engine->i915) >= 9) + return gen9_xcs_offsets; + else + return gen8_xcs_offsets; + } +} + static void unwind_wa_tail(struct i915_request *rq) { rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); @@ -482,7 +852,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_for_each_entry_safe_reverse(rq, rn, &engine->active.requests, sched.link) { - struct intel_engine_cs *owner; if (i915_request_completed(rq)) continue; /* XXX */ @@ -497,8 +866,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) * engine so that it can be moved across onto another physical * engine as load dictates. */ - owner = rq->hw_context->engine; - if (likely(owner == engine)) { + if (likely(rq->execution_mask == engine->mask)) { GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != prio) { prio = rq_prio(rq); @@ -509,6 +877,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) list_move(&rq->sched.link, pl); active = rq; } else { + struct intel_engine_cs *owner = rq->hw_context->engine; + /* * Decouple the virtual breadcrumb before moving it * back to the virtual engine -- we don't want the @@ -518,7 +888,8 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) */ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) { - spin_lock(&rq->lock); + spin_lock_nested(&rq->lock, + SINGLE_DEPTH_NESTING); i915_request_cancel_breadcrumb(rq); spin_unlock(&rq->lock); } @@ -562,6 +933,18 @@ __execlists_schedule_in(struct i915_request *rq) intel_context_get(ce); + if (ce->tag) { + /* Use a fixed tag for OA and friends */ + ce->lrc_desc |= (u64)ce->tag << 32; + } else { + /* We don't need a strict matching tag, just different values */ + ce->lrc_desc &= ~GENMASK_ULL(47, 37); + ce->lrc_desc |= + (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) << + GEN11_SW_CTX_ID_SHIFT; + BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); + } + intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -631,7 +1014,6 @@ execlists_schedule_out(struct i915_request *rq) struct intel_engine_cs *cur, *old; trace_i915_request_out(rq); - GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); old = READ_ONCE(ce->inflight); do @@ -648,7 +1030,7 @@ static u64 execlists_update_context(const struct i915_request *rq) struct intel_context *ce = rq->hw_context; u64 desc; - ce->lrc_reg_state[CTX_RING_TAIL + 1] = + ce->lrc_reg_state[CTX_RING_TAIL] = intel_ring_set_tail(rq->ring, rq->tail); /* @@ -671,6 +1053,10 @@ static u64 execlists_update_context(const struct i915_request *rq) desc = ce->lrc_desc; ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + /* Wa_1607138340:tgl */ + if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0)) + desc |= CTX_DESC_FORCE_RESTORE; + return desc; } @@ -693,6 +1079,9 @@ trace_ports(const struct intel_engine_execlists *execlists, const struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); + if (!ports[0]) + return; + GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n", engine->name, msg, ports[0]->fence.context, @@ -713,25 +1102,45 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, trace_ports(execlists, msg, execlists->pending); - if (!execlists->pending[0]) + if (!execlists->pending[0]) { + GEM_TRACE_ERR("Nothing pending for promotion!\n"); return false; + } - if (execlists->pending[execlists_num_ports(execlists)]) + if (execlists->pending[execlists_num_ports(execlists)]) { + GEM_TRACE_ERR("Excess pending[%d] for promotion!\n", + execlists_num_ports(execlists)); return false; + } for (port = execlists->pending; (rq = *port); port++) { - if (ce == rq->hw_context) + if (ce == rq->hw_context) { + GEM_TRACE_ERR("Duplicate context in pending[%zd]\n", + port - execlists->pending); return false; + } ce = rq->hw_context; if (i915_request_completed(rq)) continue; - if (i915_active_is_idle(&ce->active)) + if (i915_active_is_idle(&ce->active)) { + GEM_TRACE_ERR("Inactive context in pending[%zd]\n", + port - execlists->pending); return false; + } + + if (!i915_vma_is_pinned(ce->state)) { + GEM_TRACE_ERR("Unpinned context in pending[%zd]\n", + port - execlists->pending); + return false; + } - if (!i915_vma_is_pinned(ce->state)) + if (!i915_vma_is_pinned(ce->ring->vma)) { + GEM_TRACE_ERR("Unpinned ringbuffer in pending[%zd]\n", + port - execlists->pending); return false; + } } return ce; @@ -797,6 +1206,21 @@ static bool can_merge_rq(const struct i915_request *prev, GEM_BUG_ON(prev == next); GEM_BUG_ON(!assert_priority_queue(prev, next)); + /* + * We do not submit known completed requests. Therefore if the next + * request is already completed, we can pretend to merge it in + * with the previous context (and we will skip updating the ELSP + * and tracking). Thus hopefully keeping the ELSP full with active + * contexts, despite the best efforts of preempt-to-busy to confuse + * us. + */ + if (i915_request_completed(next)) + return true; + + if (unlikely((prev->flags ^ next->flags) & + (I915_REQUEST_NOPREEMPT | I915_REQUEST_SENTINEL))) + return false; + if (!can_merge_ctx(prev->hw_context, next->hw_context)) return false; @@ -806,47 +1230,7 @@ static bool can_merge_rq(const struct i915_request *prev, static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { - u32 base = engine->mmio_base; - - /* Must match execlists_init_reg_state()! */ - - regs[CTX_CONTEXT_CONTROL] = - i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base)); - regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base)); - regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base)); - regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base)); - regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base)); - - regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base)); - regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base)); - regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base)); - regs[CTX_SECOND_BB_HEAD_U] = - i915_mmio_reg_offset(RING_SBBADDR_UDW(base)); - regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base)); - regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base)); - - regs[CTX_CTX_TIMESTAMP] = - i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base)); - regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3)); - regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3)); - regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2)); - regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2)); - regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1)); - regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1)); - regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); - regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); - - if (engine->class == RENDER_CLASS) { - regs[CTX_RCS_INDIRECT_CTX] = - i915_mmio_reg_offset(RING_INDIRECT_CTX(base)); - regs[CTX_RCS_INDIRECT_CTX_OFFSET] = - i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base)); - regs[CTX_BB_PER_CTX_PTR] = - i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base)); - - regs[CTX_R_PWR_CLK_STATE] = - i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); - } + set_offsets(regs, reg_offsets(engine), engine); } static bool virtual_matches(const struct virtual_engine *ve, @@ -893,7 +1277,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, static struct i915_request * last_active(const struct intel_engine_execlists *execlists) { - struct i915_request * const *last = execlists->active; + struct i915_request * const *last = READ_ONCE(execlists->active); while (*last && i915_request_completed(*last)) last++; @@ -1094,7 +1478,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; last = NULL; } else if (need_timeslice(engine, last) && - !timer_pending(&engine->execlists.timer)) { + timer_expired(&engine->execlists.timer)) { GEM_TRACE("%s: expired last=%llx:%lld, prio=%d, hint=%d\n", engine->name, last->fence.context, @@ -1130,8 +1514,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * submission. */ if (!list_is_last(&last->sched.link, - &engine->active.requests)) + &engine->active.requests)) { + /* + * Even if ELSP[1] is occupied and not worthy + * of timeslices, our queue might be. + */ + if (!execlists->timer.expires && + need_timeslice(engine, last)) + mod_timer(&execlists->timer, + jiffies + 1); return; + } /* * WaIdleLiteRestore:bdw,skl @@ -1172,21 +1565,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) continue; } - if (i915_request_completed(rq)) { - ve->request = NULL; - ve->base.execlists.queue_priority_hint = INT_MIN; - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); - - rq->engine = engine; - __i915_request_submit(rq); - - spin_unlock(&ve->base.active.lock); - - rb = rb_first_cached(&execlists->virtual); - continue; - } - if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); return; /* leave this for another */ @@ -1214,7 +1592,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) unsigned int n; GEM_BUG_ON(READ_ONCE(ve->context.inflight)); - virtual_update_register_offsets(regs, engine); + + if (!intel_engine_has_relative_mmio(engine)) + virtual_update_register_offsets(regs, + engine); if (!list_empty(&ve->context.signals)) virtual_xfer_breadcrumbs(ve, engine); @@ -1237,11 +1618,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(ve->siblings[0] != engine); } - __i915_request_submit(rq); - if (!i915_request_completed(rq)) { + if (__i915_request_submit(rq)) { submit = true; last = rq; } + i915_request_put(rq); + + /* + * Hmm, we have a bunch of virtual engine requests, + * but the first one was already completed (thanks + * preempt-to-busy!). Keep looking at the veng queue + * until we have no more relevant requests (i.e. + * the normal submit queue has higher priority). + */ + if (!submit) { + spin_unlock(&ve->base.active.lock); + rb = rb_first_cached(&execlists->virtual); + continue; + } } spin_unlock(&ve->base.active.lock); @@ -1254,8 +1648,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - if (i915_request_completed(rq)) - goto skip; + bool merge = true; /* * Can we combine this request with the current port? @@ -1285,6 +1678,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last->hw_context == rq->hw_context) goto done; + if (i915_request_has_sentinel(last)) + goto done; + /* * If GVT overrides us we only ever submit * port[0], leaving port[1] empty. Note that we @@ -1296,14 +1692,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine) ctx_single_port_submission(rq->hw_context)) goto done; - *port = execlists_schedule_in(last, port - execlists->pending); - port++; + merge = false; } - last = rq; - submit = true; -skip: - __i915_request_submit(rq); + if (__i915_request_submit(rq)) { + if (!merge) { + *port = execlists_schedule_in(last, port - execlists->pending); + port++; + last = NULL; + } + + GEM_BUG_ON(last && + !can_merge_ctx(last->hw_context, + rq->hw_context)); + + submit = true; + last = rq; + } } rb_erase_cached(&p->node, &execlists->queue); @@ -1334,11 +1739,26 @@ done: if (submit) { *port = execlists_schedule_in(last, port - execlists->pending); - memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists->switch_priority_hint = switch_prio(engine, *execlists->pending); + + /* + * Skip if we ended up with exactly the same set of requests, + * e.g. trying to timeslice a pair of ordered contexts + */ + if (!memcmp(execlists->active, execlists->pending, + (port - execlists->pending + 1) * sizeof(*port))) { + do + execlists_schedule_out(fetch_and_zero(port)); + while (port-- != execlists->pending); + + goto skip_submit; + } + + memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists_submit_ports(engine); } else { +skip_submit: ring_set_paused(engine, 0); } } @@ -1371,13 +1791,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists) return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); } -enum csb_step { - CSB_NOP, - CSB_PROMOTE, - CSB_PREEMPT, - CSB_COMPLETE, -}; - /* * Starting with Gen12, the status has a new format: * @@ -1404,7 +1817,7 @@ enum csb_step { * bits 47-57: sw context id of the lrc the GT switched away from * bits 58-63: sw counter of the lrc the GT switched away from */ -static inline enum csb_step +static inline bool gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) { u32 lower_dw = csb[0]; @@ -1413,9 +1826,6 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw); bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; - if (!ctx_away_valid && ctx_to_valid) - return CSB_PROMOTE; - /* * The context switch detail is not guaranteed to be 5 when a preemption * occurs, so we can't just check for that. The check below works for @@ -1423,8 +1833,10 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) * instructions and lite-restore. Preempt-to-idle via the CTRL register * would require some extra handling, but we don't support that. */ - if (new_queue && ctx_away_valid) - return CSB_PREEMPT; + if (!ctx_away_valid || new_queue) { + GEM_BUG_ON(!ctx_to_valid); + return true; + } /* * switch detail = 5 is covered by the case above and we do not expect a @@ -1432,30 +1844,13 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) * use polling mode. */ GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw)); - - if (*execlists->active) { - GEM_BUG_ON(!ctx_away_valid); - return CSB_COMPLETE; - } - - return CSB_NOP; + return false; } -static inline enum csb_step +static inline bool gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb) { - unsigned int status = *csb; - - if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) - return CSB_PROMOTE; - - if (status & GEN8_CTX_STATUS_PREEMPTED) - return CSB_PREEMPT; - - if (*execlists->active) - return CSB_COMPLETE; - - return CSB_NOP; + return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); } static void process_csb(struct intel_engine_cs *engine) @@ -1465,6 +1860,13 @@ static void process_csb(struct intel_engine_cs *engine) const u8 num_entries = execlists->csb_size; u8 head, tail; + /* + * As we modify our execlists state tracking we require exclusive + * access. Either we are inside the tasklet, or the tasklet is disabled + * and we assume that is only inside the reset paths and so serialised. + */ + GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && + !reset_in_progress(execlists)); GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); /* @@ -1494,7 +1896,7 @@ static void process_csb(struct intel_engine_cs *engine) rmb(); do { - enum csb_step csb_step; + bool promote; if (++head == num_entries) head = 0; @@ -1522,20 +1924,19 @@ static void process_csb(struct intel_engine_cs *engine) buf[2 * head + 0], buf[2 * head + 1]); if (INTEL_GEN(engine->i915) >= 12) - csb_step = gen12_csb_parse(execlists, buf + 2 * head); + promote = gen12_csb_parse(execlists, buf + 2 * head); else - csb_step = gen8_csb_parse(execlists, buf + 2 * head); + promote = gen8_csb_parse(execlists, buf + 2 * head); + if (promote) { + if (!inject_preempt_hang(execlists)) + ring_set_paused(engine, 0); - switch (csb_step) { - case CSB_PREEMPT: /* cancel old inflight, prepare for switch */ + /* cancel old inflight, prepare for switch */ trace_ports(execlists, "preempted", execlists->active); - while (*execlists->active) execlists_schedule_out(*execlists->active++); - /* fallthrough */ - case CSB_PROMOTE: /* switch pending to inflight */ - GEM_BUG_ON(*execlists->active); + /* switch pending to inflight */ GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); execlists->active = memcpy(execlists->inflight, @@ -1545,14 +1946,14 @@ static void process_csb(struct intel_engine_cs *engine) if (enable_timeslice(execlists)) mod_timer(&execlists->timer, jiffies + 1); - - if (!inject_preempt_hang(execlists)) - ring_set_paused(engine, 0); + else + cancel_timer(&execlists->timer); WRITE_ONCE(execlists->pending[0], NULL); - break; + } else { + GEM_BUG_ON(!*execlists->active); - case CSB_COMPLETE: /* port0 completed, advanced to port1 */ + /* port0 completed, advanced to port1 */ trace_ports(execlists, "completed", execlists->active); /* @@ -1567,10 +1968,6 @@ static void process_csb(struct intel_engine_cs *engine) GEM_BUG_ON(execlists->active - execlists->inflight > execlists_num_ports(execlists)); - break; - - case CSB_NOP: - break; } } while (head != tail); @@ -1593,8 +1990,11 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { lockdep_assert_held(&engine->active.lock); - if (!engine->execlists.pending[0]) + if (!engine->execlists.pending[0]) { + rcu_read_lock(); /* protect peeking at execlists->active */ execlists_dequeue(engine); + rcu_read_unlock(); + } } /* @@ -1726,14 +2126,13 @@ static void execlists_context_unpin(struct intel_context *ce) check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE, ce->engine); - i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); intel_ring_reset(ce->ring, ce->ring->tail); } static void -__execlists_update_reg_state(struct intel_context *ce, - struct intel_engine_cs *engine) +__execlists_update_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) { struct intel_ring *ring = ce->ring; u32 *regs = ce->lrc_reg_state; @@ -1741,16 +2140,16 @@ __execlists_update_reg_state(struct intel_context *ce, GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head)); GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); - regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma); - regs[CTX_RING_HEAD + 1] = ring->head; - regs[CTX_RING_TAIL + 1] = ring->tail; + regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma); + regs[CTX_RING_HEAD] = ring->head; + regs[CTX_RING_TAIL] = ring->tail; /* RPCS */ if (engine->class == RENDER_CLASS) { - regs[CTX_R_PWR_CLK_STATE + 1] = + regs[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(engine->i915, &ce->sseu); - i915_oa_init_reg_state(engine, ce, regs); + i915_oa_init_reg_state(ce, engine); } } @@ -1776,18 +2175,12 @@ __execlists_context_pin(struct intel_context *ce, goto unpin_active; } - ret = i915_gem_context_pin_hw_id(ce->gem_context); - if (ret) - goto unpin_map; - ce->lrc_desc = lrc_descriptor(ce, engine); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine); return 0; -unpin_map: - i915_gem_object_unpin_map(ce->state->obj); unpin_active: intel_context_active_release(ce); err: @@ -1843,7 +2236,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; - GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb); + GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb); cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) @@ -1859,7 +2252,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) *cs++ = MI_NOOP; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = rq->timeline->hwsp_offset; + *cs++ = i915_request_timeline(rq)->hwsp_offset; *cs++ = 0; *cs++ = rq->fence.seqno - 1; @@ -1871,60 +2264,6 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) return 0; } -static int emit_pdps(struct i915_request *rq) -{ - const struct intel_engine_cs * const engine = rq->engine; - struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm); - int err, i; - u32 *cs; - - GEM_BUG_ON(intel_vgpu_active(rq->i915)); - - /* - * Beware ye of the dragons, this sequence is magic! - * - * Small changes to this sequence can cause anything from - * GPU hangs to forcewake errors and machine lockups! - */ - - /* Flush any residual operations from the context load */ - err = engine->emit_flush(rq, EMIT_FLUSH); - if (err) - return err; - - /* Magic required to prevent forcewake errors! */ - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - return err; - - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Ensure the LRI have landed before we invalidate & continue */ - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - u32 base = engine->mmio_base; - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); - } - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - /* Be doubly sure the LRI have landed before proceeding */ - err = engine->emit_flush(rq, EMIT_FLUSH); - if (err) - return err; - - /* Re-invalidate the TLB for luck */ - return engine->emit_flush(rq, EMIT_INVALIDATE); -} - static int execlists_request_alloc(struct i915_request *request) { int ret; @@ -1947,10 +2286,7 @@ static int execlists_request_alloc(struct i915_request *request) */ /* Unconditionally invalidate GPU caches and TLBs. */ - if (i915_vm_is_4lvl(request->hw_context->vm)) - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); - else - ret = emit_pdps(request); + ret = request->engine->emit_flush(request, EMIT_INVALIDATE); if (ret) return ret; @@ -2002,12 +2338,6 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) return batch; } -static u32 slm_offset(struct intel_engine_cs *engine) -{ - return intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA); -} - /* * Typically we only have one indirect_ctx and per_ctx batch buffer which are * initialized at the beginning and shared across all contexts but this field @@ -2036,10 +2366,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* Actual scratch location is at 128 bytes offset */ batch = gen8_emit_pipe_control(batch, PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_STORE_DATA_INDEX | PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE, - slm_offset(engine)); + LRC_PPHWSP_SCRATCH_ADDR); *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -2399,10 +2729,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) static struct i915_request *active_request(struct i915_request *rq) { - const struct list_head * const list = &rq->timeline->requests; const struct intel_context * const ce = rq->hw_context; struct i915_request *active = NULL; + struct list_head *list; + + if (!i915_request_is_active(rq)) /* unwound, but incomplete! */ + return rq; + list = &i915_request_active_timeline(rq)->requests; list_for_each_entry_from_reverse(rq, list, link) { if (i915_request_completed(rq)) break; @@ -2416,6 +2750,17 @@ static struct i915_request *active_request(struct i915_request *rq) return active; } +static void __execlists_reset_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + if (INTEL_GEN(engine->i915) >= 9) { + regs[GEN9_CTX_RING_MI_MODE + 1] &= ~STOP_RING; + regs[GEN9_CTX_RING_MI_MODE + 1] |= STOP_RING << 16; + } +} + static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -2423,6 +2768,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) struct i915_request *rq; u32 *regs; + mb(); /* paranoia: read the CSB pointers from after the reset */ + clflush(execlists->csb_write); + mb(); + process_csb(engine); /* drain preemption events */ /* Following the reset, we need to reload the CSB read/write pointers */ @@ -2437,15 +2786,24 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) if (!rq) goto unwind; + /* We still have requests in-flight; the engine should be active */ + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + ce = rq->hw_context; - GEM_BUG_ON(i915_active_is_idle(&ce->active)); GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); + + /* Proclaim we have exclusive access to the context image! */ + __context_pin_acquire(ce); + rq = active_request(rq); if (!rq) { + /* Idle context; tidy up the ring so we can restart afresh */ ce->ring->head = ce->ring->tail; goto out_replay; } + /* Context has requests still in-flight; it should not be idle! */ + GEM_BUG_ON(i915_active_is_idle(&ce->active)); ce->ring->head = intel_ring_wrap(ce->ring, rq->head); /* @@ -2486,19 +2844,23 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * future request will be after userspace has had the opportunity * to recreate its own state. */ + GEM_BUG_ON(!intel_context_is_pinned(ce)); regs = ce->lrc_reg_state; if (engine->pinned_default_state) { memcpy(regs, /* skip restoring the vanilla PPHWSP */ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, engine->context_size - PAGE_SIZE); } - execlists_init_reg_state(regs, ce, engine, ce->ring); + execlists_init_reg_state(regs, ce, engine, ce->ring, false); out_replay: GEM_TRACE("%s replay {head:%04x, tail:%04x\n", engine->name, ce->ring->head, ce->ring->tail); intel_ring_update_space(ce->ring); + __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine); + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ + __context_pin_release(ce); unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -2552,12 +2914,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) __execlists_reset(engine, true); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) { - if (!i915_request_signaled(rq)) - dma_fence_set_error(&rq->fence, -EIO); - - i915_request_mark_complete(rq); - } + list_for_each_entry(rq, &engine->active.requests, sched.link) + mark_eio(rq); /* Flush the queued requests to the timeline list (for retiring). */ while ((rb = rb_first_cached(&execlists->queue))) { @@ -2565,10 +2923,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { - list_del_init(&rq->sched.link); + mark_eio(rq); __i915_request_submit(rq); - dma_fence_set_error(&rq->fence, -EIO); - i915_request_mark_complete(rq); } rb_erase_cached(&p->node, &execlists->queue); @@ -2584,13 +2940,15 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) RB_CLEAR_NODE(rb); spin_lock(&ve->base.active.lock); - if (ve->request) { - ve->request->engine = engine; - __i915_request_submit(ve->request); - dma_fence_set_error(&ve->request->fence, -EIO); - i915_request_mark_complete(ve->request); + rq = fetch_and_zero(&ve->request); + if (rq) { + mark_eio(rq); + + rq->engine = engine; + __i915_request_submit(rq); + i915_request_put(rq); + ve->base.execlists.queue_priority_hint = INT_MIN; - ve->request = NULL; } spin_unlock(&ve->base.active.lock); } @@ -2723,7 +3081,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) } *cs++ = cmd; - *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = LRC_PPHWSP_SCRATCH_ADDR; *cs++ = 0; /* upper addr */ *cs++ = 0; /* value */ intel_ring_advance(request, cs); @@ -2734,10 +3092,6 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode) static int gen8_emit_flush_render(struct i915_request *request, u32 mode) { - struct intel_engine_cs *engine = request->engine; - u32 scratch_addr = - intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); bool vf_flush_wa = false, dc_flush_wa = false; u32 *cs, flags = 0; int len; @@ -2759,7 +3113,7 @@ static int gen8_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; /* * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL @@ -2792,7 +3146,7 @@ static int gen8_emit_flush_render(struct i915_request *request, cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, 0); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); if (dc_flush_wa) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); @@ -2805,11 +3159,6 @@ static int gen8_emit_flush_render(struct i915_request *request, static int gen11_emit_flush_render(struct i915_request *request, u32 mode) { - struct intel_engine_cs *engine = request->engine; - const u32 scratch_addr = - intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); - if (mode & EMIT_FLUSH) { u32 *cs; u32 flags = 0; @@ -2822,13 +3171,13 @@ static int gen11_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; cs = intel_ring_begin(request, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -2846,19 +3195,111 @@ static int gen11_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; cs = intel_ring_begin(request, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } return 0; } +static u32 preparser_disable(bool state) +{ + return MI_ARB_CHECK | 1 << 8 | state; +} + +static int gen12_emit_flush_render(struct i915_request *request, + u32 mode) +{ + if (mode & EMIT_FLUSH) { + u32 flags = 0; + u32 *cs; + + flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + /* Wa_1409600907:tgl */ + flags |= PIPE_CONTROL_DEPTH_STALL; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + flags |= PIPE_CONTROL_CS_STALL; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(request, cs); + } + + if (mode & EMIT_INVALIDATE) { + u32 flags = 0; + u32 *cs; + + flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + flags |= PIPE_CONTROL_CS_STALL; + + cs = intel_ring_begin(request, 8); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Prevent the pre-parser from skipping past the TLB + * invalidate and loading a stale page for the batch + * buffer / request payload. + */ + *cs++ = preparser_disable(true); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + + *cs++ = preparser_disable(false); + intel_ring_advance(request, cs); + + /* + * Wa_1604544889:tgl + */ + if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) { + flags = 0; + flags |= PIPE_CONTROL_CS_STALL; + flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + cs = intel_ring_begin(request, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, + LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(request, cs); + } + } + + return 0; +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -2907,7 +3348,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write(cs, request->fence.seqno, - request->timeline->hwsp_offset, + i915_request_active_timeline(request)->hwsp_offset, 0); return gen8_emit_fini_breadcrumb_footer(request, cs); @@ -2915,28 +3356,28 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write_rcs(cs, - request->fence.seqno, - request->timeline->hwsp_offset, - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE); - - /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ cs = gen8_emit_pipe_control(cs, - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_CS_STALL, + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE, 0); + /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ + cs = gen8_emit_ggtt_write_rcs(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); + return gen8_emit_fini_breadcrumb_footer(request, cs); } -static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, - u32 *cs) +static u32 * +gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, - request->timeline->hwsp_offset, + i915_request_active_timeline(request)->hwsp_offset, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TILE_CACHE_FLUSH | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | @@ -2947,9 +3388,87 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, return gen8_emit_fini_breadcrumb_footer(request, cs); } +/* + * Note that the CS instruction pre-parser will not stall on the breadcrumb + * flush and will continue pre-fetching the instructions after it before the + * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at + * BB_START/END instructions, so, even though we might pre-fetch the pre-amble + * of the next request before the memory has been flushed, we're guaranteed that + * we won't access the batch itself too early. + * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, + * so, if the current request is modifying an instruction in the next request on + * the same intel_context, we might pre-fetch and then execute the pre-update + * instruction. To avoid this, the users of self-modifying code should either + * disable the parser around the code emitting the memory writes, via a new flag + * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For + * the in-kernel use-cases we've opted to use a separate context, see + * reloc_gpu() as an example. + * All the above applies only to the instructions themselves. Non-inline data + * used by the instructions is not pre-fetched. + */ + +static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) +{ + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = intel_hws_preempt_address(request->engine); + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + + return cs; +} + +static __always_inline u32* +gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) +{ + *cs++ = MI_USER_INTERRUPT; + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + if (intel_engine_has_semaphores(request->engine)) + cs = gen12_emit_preempt_busywait(request, cs); + + request->tail = intel_ring_offset(request, cs); + assert_ring_tail_valid(request->ring, request->tail); + + return gen8_emit_wa_tail(request, cs); +} + +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + 0); + + return gen12_emit_fini_breadcrumb_footer(request, cs); +} + +static u32 * +gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) +{ + cs = gen8_emit_ggtt_write_rcs(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + /* Wa_1409600907:tgl */ + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_HDC_PIPELINE_FLUSH); + + return gen12_emit_fini_breadcrumb_footer(request, cs); +} + static void execlists_park(struct intel_engine_cs *engine) { - del_timer(&engine->execlists.timer); + cancel_timer(&engine->execlists.timer); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) @@ -2972,6 +3491,9 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) engine->flags |= I915_ENGINE_HAS_PREEMPTION; } + + if (INTEL_GEN(engine->i915) >= 12) + engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; } static void execlists_destroy(struct intel_engine_cs *engine) @@ -2999,6 +3521,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; + if (INTEL_GEN(engine->i915) >= 12) + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; engine->set_default_submission = intel_execlists_set_default_submission; @@ -3044,6 +3568,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine) { switch (INTEL_GEN(engine->i915)) { case 12: + engine->emit_flush = gen12_emit_flush_render; + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; + break; case 11: engine->emit_flush = gen11_emit_flush_render; engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; @@ -3116,7 +3643,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) return 0; } -static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) +static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine) { u32 indirect_ctx_offset; @@ -3149,86 +3676,50 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static void execlists_init_reg_state(u32 *regs, - struct intel_context *ce, - struct intel_engine_cs *engine, - struct intel_ring *ring) -{ - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm); - bool rcs = engine->class == RENDER_CLASS; - u32 base = engine->mmio_base; - /* - * A context is actually a big batch buffer with several - * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The - * values we are setting here are only for the first context restore: - * on a subsequent save, the GPU will recreate this batchbuffer with new - * values (including all the missing MI_LOAD_REGISTER_IMM commands that - * we are not initializing here). - * - * Must keep consistent with virtual_update_register_offsets(). - */ - regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | - MI_LRI_FORCE_POSTED; - - CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base), +static void init_common_reg_state(u32 * const regs, + const struct intel_engine_cs *engine, + const struct intel_ring *ring) +{ + regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH)); - if (INTEL_GEN(engine->i915) < 11) { - regs[CTX_CONTEXT_CONTROL + 1] |= + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + if (INTEL_GEN(engine->i915) < 11) + regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | CTX_CTRL_RS_CTX_ENABLE); - } - CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); - CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); - CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), - RING_CTL_SIZE(ring->size) | RING_VALID); - CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); - CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); - CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); - CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); - CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); - CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); - if (rcs) { - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - - CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); - CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, - RING_INDIRECT_CTX_OFFSET(base), 0); - if (wa_ctx->indirect_ctx.size) { - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - - regs[CTX_RCS_INDIRECT_CTX + 1] = - (ggtt_offset + wa_ctx->indirect_ctx.offset) | - (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - - regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = - intel_lr_indirect_ctx_offset(engine) << 6; - } - CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); - if (wa_ctx->per_ctx.size) { - u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID; + regs[CTX_BB_STATE] = RING_BB_PPGTT; +} - regs[CTX_BB_PER_CTX_PTR + 1] = - (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; - } +static void init_wa_bb_reg_state(u32 * const regs, + const struct intel_engine_cs *engine, + u32 pos_bb_per_ctx) +{ + const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; + + if (wa_ctx->per_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); + + regs[pos_bb_per_ctx] = + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } - regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; + if (wa_ctx->indirect_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); - /* PDP values well be assigned later if needed */ - CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0); - CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0); - CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0); - CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0); - CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0); - CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0); - CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0); - CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0); + regs[pos_bb_per_ctx + 2] = + (ggtt_offset + wa_ctx->indirect_ctx.offset) | + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); + regs[pos_bb_per_ctx + 4] = + intel_lr_indirect_ctx_offset(engine) << 6; + } +} + +static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt) +{ if (i915_vm_is_4lvl(&ppgtt->vm)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and @@ -3241,15 +3732,47 @@ static void execlists_init_reg_state(u32 *regs, ASSIGN_CTX_PDP(ppgtt, regs, 1); ASSIGN_CTX_PDP(ppgtt, regs, 0); } +} + +static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) +{ + if (i915_is_ggtt(vm)) + return i915_vm_to_ggtt(vm)->alias; + else + return i915_vm_to_ppgtt(vm); +} - if (rcs) { - regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0); +static void execlists_init_reg_state(u32 *regs, + const struct intel_context *ce, + const struct intel_engine_cs *engine, + const struct intel_ring *ring, + bool close) +{ + /* + * A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). + * + * Must keep consistent with virtual_update_register_offsets(). + */ + u32 *bbe = set_offsets(regs, reg_offsets(engine), engine); + + if (close) { /* Close the batch; used mainly by live_lrc_layout() */ + *bbe = MI_BATCH_BUFFER_END; + if (INTEL_GEN(engine->i915) >= 10) + *bbe |= BIT(0); } - regs[CTX_END] = MI_BATCH_BUFFER_END; - if (INTEL_GEN(engine->i915) >= 10) - regs[CTX_END] |= BIT(0); + init_common_reg_state(regs, engine, ring); + init_ppgtt_reg_state(regs, vm_alias(ce->vm)); + + init_wa_bb_reg_state(regs, engine, + INTEL_GEN(engine->i915) >= 12 ? + GEN12_CTX_BB_PER_CTX_PTR : + CTX_BB_PER_CTX_PTR); } static int @@ -3258,6 +3781,7 @@ populate_lr_context(struct intel_context *ce, struct intel_engine_cs *engine, struct intel_ring *ring) { + bool inhibit = true; void *vaddr; u32 *regs; int ret; @@ -3289,14 +3813,15 @@ populate_lr_context(struct intel_context *ce, memcpy(vaddr + start, defaults + start, engine->context_size); i915_gem_object_unpin_map(engine->default_state); + inhibit = false; } /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ regs = vaddr + LRC_STATE_PN * PAGE_SIZE; - execlists_init_reg_state(regs, ce, engine, ring); - if (!engine->default_state) - regs[CTX_CONTEXT_CONTROL + 1] |= + execlists_init_reg_state(regs, ce, engine, ring, inhibit); + if (inhibit) + regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); ret = 0; @@ -3436,8 +3961,9 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve) return; swap(ve->siblings[swp], ve->siblings[0]); - virtual_update_register_offsets(ve->context.lrc_reg_state, - ve->siblings[0]); + if (!intel_engine_has_relative_mmio(ve->siblings[0])) + virtual_update_register_offsets(ve->context.lrc_reg_state, + ve->siblings[0]); } static int virtual_context_pin(struct intel_context *ce) @@ -3594,6 +4120,8 @@ submit_engine: static void virtual_submit_request(struct i915_request *rq) { struct virtual_engine *ve = to_virtual_engine(rq->engine); + struct i915_request *old; + unsigned long flags; GEM_TRACE("%s: rq=%llx:%lld\n", ve->base.name, @@ -3602,15 +4130,31 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); - GEM_BUG_ON(ve->request); - GEM_BUG_ON(!list_empty(virtual_queue(ve))); + spin_lock_irqsave(&ve->base.active.lock, flags); - ve->base.execlists.queue_priority_hint = rq_prio(rq); - WRITE_ONCE(ve->request, rq); + old = ve->request; + if (old) { /* background completion event from preempt-to-busy */ + GEM_BUG_ON(!i915_request_completed(old)); + __i915_request_submit(old); + i915_request_put(old); + } - list_move_tail(&rq->sched.link, virtual_queue(ve)); + if (i915_request_completed(rq)) { + __i915_request_submit(rq); + + ve->base.execlists.queue_priority_hint = INT_MIN; + ve->request = NULL; + } else { + ve->base.execlists.queue_priority_hint = rq_prio(rq); + ve->request = i915_request_get(rq); + + GEM_BUG_ON(!list_empty(virtual_queue(ve))); + list_move_tail(&rq->sched.link, virtual_queue(ve)); + + tasklet_schedule(&ve->base.execlists.tasklet); + } - tasklet_schedule(&ve->base.execlists.tasklet); + spin_unlock_irqrestore(&ve->base.active.lock, flags); } static struct ve_bond * @@ -3631,18 +4175,22 @@ static void virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) { struct virtual_engine *ve = to_virtual_engine(rq->engine); + intel_engine_mask_t allowed, exec; struct ve_bond *bond; + allowed = ~to_request(signal)->engine->mask; + bond = virtual_find_bond(ve, to_request(signal)->engine); - if (bond) { - intel_engine_mask_t old, new, cmp; + if (bond) + allowed &= bond->sibling_mask; - cmp = READ_ONCE(rq->execution_mask); - do { - old = cmp; - new = cmp & bond->sibling_mask; - } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old); - } + /* Restrict the bonded request to run on only the available engines */ + exec = READ_ONCE(rq->execution_mask); + while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) + ; + + /* Prevent the master from being re-run on the bonded engines */ + to_request(signal)->execution_mask &= ~allowed; } struct intel_context * @@ -3666,6 +4214,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.i915 = ctx->i915; ve->base.gt = siblings[0]->gt; + ve->base.uncore = siblings[0]->uncore; ve->base.id = -1; ve->base.class = OTHER_CLASS; ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; @@ -3689,6 +4238,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); + intel_engine_init_breadcrumbs(&ve->base); intel_engine_init_execlists(&ve->base); @@ -3851,6 +4401,18 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, return 0; } +struct intel_engine_cs * +intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, + unsigned int sibling) +{ + struct virtual_engine *ve = to_virtual_engine(engine); + + if (sibling >= ve->num_siblings) + return NULL; + + return ve->siblings[sibling]; +} + void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, @@ -3939,6 +4501,9 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, u32 head, bool scrub) { + GEM_BUG_ON(!intel_context_is_pinned(ce)); + __context_pin_acquire(ce); + /* * We want a simple context + ring to execute the breadcrumb update. * We cannot rely on the context being intact across the GPU hang, @@ -3955,7 +4520,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, engine->context_size - PAGE_SIZE); } - execlists_init_reg_state(regs, ce, engine, ce->ring); + execlists_init_reg_state(regs, ce, engine, ce->ring, false); } /* Rerun the request; its payload has been neutered (if guilty). */ @@ -3963,6 +4528,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); + __context_pin_release(ce); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index c2bba82bcc16..99dc576a4e25 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -66,6 +66,12 @@ struct intel_engine_cs; #define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8) #define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) +#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ +#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ +/* in Gen12 ID 0x7FF is reserved to indicate idle */ +#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) + enum { INTEL_CONTEXT_SCHEDULE_IN = 0, INTEL_CONTEXT_SCHEDULE_OUT, @@ -104,6 +110,10 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine); */ #define LRC_HEADER_PAGES LRC_PPHWSP_PN +/* Space within PPHWSP reserved to be used as scratch */ +#define LRC_PPHWSP_SCRATCH 0x34 +#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) + void intel_execlists_set_default_submission(struct intel_engine_cs *engine); void intel_lr_context_reset(struct intel_engine_cs *engine, @@ -131,4 +141,8 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, const struct intel_engine_cs *master, const struct intel_engine_cs *sibling); +struct intel_engine_cs * +intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, + unsigned int sibling); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index b8f20ad71169..06ab0276e10e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -9,55 +9,41 @@ #include <linux/types.h> -/* GEN8+ Reg State Context */ -#define CTX_LRI_HEADER_0 0x01 -#define CTX_CONTEXT_CONTROL 0x02 -#define CTX_RING_HEAD 0x04 -#define CTX_RING_TAIL 0x06 -#define CTX_RING_BUFFER_START 0x08 -#define CTX_RING_BUFFER_CONTROL 0x0a -#define CTX_BB_HEAD_U 0x0c -#define CTX_BB_HEAD_L 0x0e -#define CTX_BB_STATE 0x10 -#define CTX_SECOND_BB_HEAD_U 0x12 -#define CTX_SECOND_BB_HEAD_L 0x14 -#define CTX_SECOND_BB_STATE 0x16 -#define CTX_BB_PER_CTX_PTR 0x18 -#define CTX_RCS_INDIRECT_CTX 0x1a -#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c -#define CTX_LRI_HEADER_1 0x21 -#define CTX_CTX_TIMESTAMP 0x22 -#define CTX_PDP3_UDW 0x24 -#define CTX_PDP3_LDW 0x26 -#define CTX_PDP2_UDW 0x28 -#define CTX_PDP2_LDW 0x2a -#define CTX_PDP1_UDW 0x2c -#define CTX_PDP1_LDW 0x2e -#define CTX_PDP0_UDW 0x30 -#define CTX_PDP0_LDW 0x32 -#define CTX_LRI_HEADER_2 0x41 -#define CTX_R_PWR_CLK_STATE 0x42 -#define CTX_END 0x44 - -#define CTX_REG(reg_state, pos, reg, val) do { \ - u32 *reg_state__ = (reg_state); \ - const u32 pos__ = (pos); \ - (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \ - (reg_state__)[(pos__) + 1] = (val); \ -} while (0) +/* GEN8 to GEN11 Reg State Context */ +#define CTX_CONTEXT_CONTROL (0x02 + 1) +#define CTX_RING_HEAD (0x04 + 1) +#define CTX_RING_TAIL (0x06 + 1) +#define CTX_RING_BUFFER_START (0x08 + 1) +#define CTX_RING_BUFFER_CONTROL (0x0a + 1) +#define CTX_BB_STATE (0x10 + 1) +#define CTX_BB_PER_CTX_PTR (0x18 + 1) +#define CTX_PDP3_UDW (0x24 + 1) +#define CTX_PDP3_LDW (0x26 + 1) +#define CTX_PDP2_UDW (0x28 + 1) +#define CTX_PDP2_LDW (0x2a + 1) +#define CTX_PDP1_UDW (0x2c + 1) +#define CTX_PDP1_LDW (0x2e + 1) +#define CTX_PDP0_UDW (0x30 + 1) +#define CTX_PDP0_LDW (0x32 + 1) +#define CTX_R_PWR_CLK_STATE (0x42 + 1) + +#define GEN9_CTX_RING_MI_MODE 0x54 + +/* GEN12+ Reg State Context */ +#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1) #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \ - (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _UDW] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP ## n ## _LDW] = lower_32_bits(addr__); \ } while (0) #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ u32 *reg_state__ = (reg_state); \ const u64 addr__ = px_dma(ppgtt->pd); \ - (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \ - (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \ + (reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \ } while (0) #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 728704bbbe18..5bac3966906b 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -287,10 +287,9 @@ static const struct drm_i915_mocs_entry icelake_mocs_table[] = { GEN11_MOCS_ENTRIES }; -static bool get_mocs_settings(struct intel_gt *gt, +static bool get_mocs_settings(const struct drm_i915_private *i915, struct drm_i915_mocs_table *table) { - struct drm_i915_private *i915 = gt->i915; bool result = false; if (INTEL_GEN(i915) >= 12) { @@ -331,9 +330,9 @@ static bool get_mocs_settings(struct intel_gt *gt, return result; } -static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) +static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index) { - switch (engine_id) { + switch (engine->id) { case RCS0: return GEN9_GFX_MOCS(index); case VCS0: @@ -347,7 +346,7 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) case VCS2: return GEN11_MFX2_MOCS(index); default: - MISSING_CASE(engine_id); + MISSING_CASE(engine->id); return INVALID_MMIO_REG; } } @@ -365,118 +364,25 @@ static u32 get_entry_control(const struct drm_i915_mocs_table *table, return table->table[I915_MOCS_PTE].control_value; } -/** - * intel_mocs_init_engine() - emit the mocs control table - * @engine: The engine for whom to emit the registers. - * - * This function simply emits a MI_LOAD_REGISTER_IMM command for the - * given table starting at the given address. - */ -void intel_mocs_init_engine(struct intel_engine_cs *engine) +static void init_mocs_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) { - struct intel_gt *gt = engine->gt; - struct intel_uncore *uncore = gt->uncore; - struct drm_i915_mocs_table table; - unsigned int index; - u32 unused_value; - - /* Platforms with global MOCS do not need per-engine initialization. */ - if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) - return; - - /* Called under a blanket forcewake */ - assert_forcewakes_active(uncore, FORCEWAKE_ALL); - - if (!get_mocs_settings(gt, &table)) - return; - - /* Set unused values to PTE */ - unused_value = table.table[I915_MOCS_PTE].control_value; - - for (index = 0; index < table.size; index++) { - u32 value = get_entry_control(&table, index); + struct intel_uncore *uncore = engine->uncore; + u32 unused_value = table->table[I915_MOCS_PTE].control_value; + unsigned int i; + for (i = 0; i < table->size; i++) intel_uncore_write_fw(uncore, - mocs_register(engine->id, index), - value); - } + mocs_register(engine, i), + get_entry_control(table, i)); - /* All remaining entries are also unused */ - for (; index < table.n_entries; index++) + /* All remaining entries are unused */ + for (; i < table->n_entries; i++) intel_uncore_write_fw(uncore, - mocs_register(engine->id, index), + mocs_register(engine, i), unused_value); } -static void intel_mocs_init_global(struct intel_gt *gt) -{ - struct intel_uncore *uncore = gt->uncore; - struct drm_i915_mocs_table table; - unsigned int index; - - GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); - - if (!get_mocs_settings(gt, &table)) - return; - - if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) - return; - - for (index = 0; index < table.size; index++) - intel_uncore_write(uncore, - GEN12_GLOBAL_MOCS(index), - table.table[index].control_value); - - /* - * Ok, now set the unused entries to the invalid entry (index 0). These - * entries are officially undefined and no contract for the contents and - * settings is given for these entries. - */ - for (; index < table.n_entries; index++) - intel_uncore_write(uncore, - GEN12_GLOBAL_MOCS(index), - table.table[0].control_value); -} - -static int emit_mocs_control_table(struct i915_request *rq, - const struct drm_i915_mocs_table *table) -{ - enum intel_engine_id engine = rq->engine->id; - unsigned int index; - u32 unused_value; - u32 *cs; - - if (GEM_WARN_ON(table->size > table->n_entries)) - return -ENODEV; - - /* Set unused values to PTE */ - unused_value = table->table[I915_MOCS_PTE].control_value; - - cs = intel_ring_begin(rq, 2 + 2 * table->n_entries); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries); - - for (index = 0; index < table->size; index++) { - u32 value = get_entry_control(table, index); - - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = value; - } - - /* All remaining entries are also unused */ - for (; index < table->n_entries; index++) { - *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); - *cs++ = unused_value; - } - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - /* * Get l3cc_value from MOCS entry taking into account when it's not used: * I915_MOCS_PTE's value is returned in this case. @@ -494,141 +400,93 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, u16 low, u16 high) { - return low | high << 16; + return low | (u32)high << 16; } -static int emit_mocs_l3cc_table(struct i915_request *rq, - const struct drm_i915_mocs_table *table) +static void init_l3cc_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) { - u16 unused_value; + struct intel_uncore *uncore = engine->uncore; + u16 unused_value = table->table[I915_MOCS_PTE].l3cc_value; unsigned int i; - u32 *cs; - - if (GEM_WARN_ON(table->size > table->n_entries)) - return -ENODEV; - - /* Set unused values to PTE */ - unused_value = table->table[I915_MOCS_PTE].l3cc_value; - - cs = intel_ring_begin(rq, 2 + table->n_entries); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2); for (i = 0; i < table->size / 2; i++) { u16 low = get_entry_l3cc(table, 2 * i); u16 high = get_entry_l3cc(table, 2 * i + 1); - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, low, high); + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(table, low, high)); } /* Odd table size - 1 left over */ - if (table->size & 0x01) { + if (table->size & 1) { u16 low = get_entry_l3cc(table, 2 * i); - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, low, unused_value); + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(table, low, unused_value)); i++; } /* All remaining entries are also unused */ - for (; i < table->n_entries / 2; i++) { - *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); - *cs++ = l3cc_combine(table, unused_value, unused_value); - } - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; + for (; i < table->n_entries / 2; i++) + intel_uncore_write(uncore, + GEN9_LNCFCMOCS(i), + l3cc_combine(table, unused_value, + unused_value)); } -static void intel_mocs_init_l3cc_table(struct intel_gt *gt) +void intel_mocs_init_engine(struct intel_engine_cs *engine) { - struct intel_uncore *uncore = gt->uncore; struct drm_i915_mocs_table table; - unsigned int i; - u16 unused_value; - if (!get_mocs_settings(gt, &table)) + /* Called under a blanket forcewake */ + assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); + + if (!get_mocs_settings(engine->i915, &table)) return; - /* Set unused values to PTE */ - unused_value = table.table[I915_MOCS_PTE].l3cc_value; + /* Platforms with global MOCS do not need per-engine initialization. */ + if (!HAS_GLOBAL_MOCS_REGISTERS(engine->i915)) + init_mocs_table(engine, &table); - for (i = 0; i < table.size / 2; i++) { - u16 low = get_entry_l3cc(&table, 2 * i); - u16 high = get_entry_l3cc(&table, 2 * i + 1); + if (engine->class == RENDER_CLASS) + init_l3cc_table(engine, &table); +} - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, high)); - } +static void intel_mocs_init_global(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + struct drm_i915_mocs_table table; + unsigned int index; - /* Odd table size - 1 left over */ - if (table.size & 0x01) { - u16 low = get_entry_l3cc(&table, 2 * i); + GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(&table, low, unused_value)); - i++; - } + if (!get_mocs_settings(gt->i915, &table)) + return; - /* All remaining entries are also unused */ - for (; i < table.n_entries / 2; i++) - intel_uncore_write(uncore, - GEN9_LNCFCMOCS(i), - l3cc_combine(&table, unused_value, - unused_value)); -} + if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) + return; -/** - * intel_mocs_emit() - program the MOCS register. - * @rq: Request to use to set up the MOCS tables. - * - * This function will emit a batch buffer with the values required for - * programming the MOCS register values for all the currently supported - * rings. - * - * These registers are partially stored in the RCS context, so they are - * emitted at the same time so that when a context is created these registers - * are set up. These registers have to be emitted into the start of the - * context as setting the ELSP will re-init some of these registers back - * to the hw values. - * - * Return: 0 on success, otherwise the error status. - */ -int intel_mocs_emit(struct i915_request *rq) -{ - struct drm_i915_mocs_table t; - int ret; - - if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915) || - rq->engine->class != RENDER_CLASS) - return 0; - - if (get_mocs_settings(rq->engine->gt, &t)) { - /* Program the RCS control registers */ - ret = emit_mocs_control_table(rq, &t); - if (ret) - return ret; - - /* Now program the l3cc registers */ - ret = emit_mocs_l3cc_table(rq, &t); - if (ret) - return ret; - } + for (index = 0; index < table.size; index++) + intel_uncore_write(uncore, + GEN12_GLOBAL_MOCS(index), + table.table[index].control_value); - return 0; + /* + * Ok, now set the unused entries to the invalid entry (index 0). These + * entries are officially undefined and no contract for the contents and + * settings is given for these entries. + */ + for (; index < table.n_entries; index++) + intel_uncore_write(uncore, + GEN12_GLOBAL_MOCS(index), + table.table[0].control_value); } void intel_mocs_init(struct intel_gt *gt) { - intel_mocs_init_l3cc_table(gt); - if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) intel_mocs_init_global(gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index 2ae816b7ca19..83371f3e6ba1 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -49,13 +49,10 @@ * context handling keep the MOCS in step. */ -struct i915_request; struct intel_engine_cs; struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); -int intel_mocs_emit(struct i915_request *rq); - #endif diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c new file mode 100644 index 000000000000..70f0e01a38b9 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -0,0 +1,712 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/pm_runtime.h> + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_rc6.h" +#include "intel_sideband.h" + +/** + * DOC: RC6 + * + * RC6 is a special power stage which allows the GPU to enter an very + * low-voltage mode when idle, using down to 0V while at this stage. This + * stage is entered automatically when the GPU is idle when RC6 support is + * enabled, and as soon as new workload arises GPU wakes up automatically as + * well. + * + * There are different RC6 modes available in Intel GPU, which differentiate + * among each other with the latency required to enter and leave RC6 and + * voltage consumed by the GPU in different states. + * + * The combination of the following flags define which states GPU is allowed + * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and + * RC6pp is deepest RC6. Their support by hardware varies according to the + * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one + * which brings the most power savings; deeper states save more power, but + * require higher latency to switch to and wake up. + */ + +static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6) +{ + return container_of(rc6, struct intel_gt, rc6); +} + +static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc) +{ + return rc6_to_gt(rc)->uncore; +} + +static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc) +{ + return rc6_to_gt(rc)->i915; +} + +static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val) +{ + intel_uncore_write_fw(uncore, reg, val); +} + +static void gen11_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + + set(uncore, GEN6_RC_SLEEP, 0); + + set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + + /* + * 2c: Program Coarse Power Gating Policies. + * + * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we + * use instead is a more conservative estimate for the maximum time + * it takes us to service a CS interrupt and submit a new ELSP - that + * is the time which the GPU is idle waiting for the CPU to select the + * next request to execute. If the idle hysteresis is less than that + * interrupt service latency, the hardware will automatically gate + * the power well and we will then incur the wake up cost on top of + * the service latency. A similar guide from plane_state is that we + * do not want the enable hysteresis to less than the wakeup latency. + * + * igt/gem_exec_nop/sequential provides a rough estimate for the + * service latency, and puts it around 10us for Broadwell (and other + * big core) and around 40us for Broxton (and other low power cores). + * [Note that for legacy ringbuffer submission, this is less than 1us!] + * However, the wakeup latency on Broxton is closer to 100us. To be + * conservative, we have to factor in a context switch on top (due + * to ksoftirqd). + */ + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + + /* 3a: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + GEN6_RC_CTL_EI_MODE(1)); + + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE); +} + +static void gen9_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 rc6_mode; + + /* 2b: Program RC6 thresholds.*/ + if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) { + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); + set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150); + } else if (IS_SKYLAKE(rc6_to_i915(rc6))) { + /* + * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only + * when CPG is enabled + */ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); + } else { + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); + } + + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GUC_MAX_IDLE_COUNT, 0xA); + + set(uncore, GEN6_RC_SLEEP, 0); + + /* + * 2c: Program Coarse Power Gating Policies. + * + * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we + * use instead is a more conservative estimate for the maximum time + * it takes us to service a CS interrupt and submit a new ELSP - that + * is the time which the GPU is idle waiting for the CPU to select the + * next request to execute. If the idle hysteresis is less than that + * interrupt service latency, the hardware will automatically gate + * the power well and we will then incur the wake up cost on top of + * the service latency. A similar guide from plane_state is that we + * do not want the enable hysteresis to less than the wakeup latency. + * + * igt/gem_exec_nop/sequential provides a rough estimate for the + * service latency, and puts it around 10us for Broadwell (and other + * big core) and around 40us for Broxton (and other low power cores). + * [Note that for legacy ringbuffer submission, this is less than 1us!] + * However, the wakeup latency on Broxton is closer to 100us. To be + * conservative, we have to factor in a context switch on top (due + * to ksoftirqd). + */ + set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); + set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); + + /* 3a: Enable RC6 */ + set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + + /* WaRsUseTimeoutMode:cnl (pre-prod) */ + if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0)) + rc6_mode = GEN7_RC_CTL_TO_MODE; + else + rc6_mode = GEN6_RC_CTL_EI_MODE(1); + + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_RC6_ENABLE | + rc6_mode); + + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); +} + +static void gen8_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2b: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ + + /* 3: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + GEN6_RC_CTL_RC6_ENABLE); +} + +static void gen6_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 rc6vids, rc6_mask; + int ret; + + set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); + set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GEN6_RC_SLEEP, 0); + set(uncore, GEN6_RC1e_THRESHOLD, 1000); + if (IS_IVYBRIDGE(i915)) + set(uncore, GEN6_RC6_THRESHOLD, 125000); + else + set(uncore, GEN6_RC6_THRESHOLD, 50000); + set(uncore, GEN6_RC6p_THRESHOLD, 150000); + set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */ + + /* We don't use those on Haswell */ + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + if (HAS_RC6p(i915)) + rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; + if (HAS_RC6pp(i915)) + rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; + set(uncore, GEN6_RC_CONTROL, + rc6_mask | + GEN6_RC_CTL_EI_MODE(1) | + GEN6_RC_CTL_HW_ENABLE); + + rc6vids = 0; + ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS, + &rc6vids, NULL); + if (IS_GEN(i915, 6) && ret) { + DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); + } else if (IS_GEN(i915, 6) && + (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { + DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", + GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); + rc6vids &= 0xffff00; + rc6vids |= GEN6_ENCODE_RC6_VID(450); + ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); + if (ret) + DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); + } +} + +/* Check that the pcbr address is not empty. */ +static int chv_rc6_init(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + resource_size_t pctx_paddr, paddr; + resource_size_t pctx_size = 32 * SZ_1K; + u32 pcbr; + + pcbr = intel_uncore_read(uncore, VLV_PCBR); + if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size; + GEM_BUG_ON(paddr > U32_MAX); + + pctx_paddr = (paddr & ~4095); + intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); + } + + return 0; +} + +static int vlv_rc6_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_gem_object *pctx; + resource_size_t pctx_paddr; + resource_size_t pctx_size = 24 * SZ_1K; + u32 pcbr; + + pcbr = intel_uncore_read(uncore, VLV_PCBR); + if (pcbr) { + /* BIOS set it up already, grab the pre-alloc'd space */ + resource_size_t pcbr_offset; + + pcbr_offset = (pcbr & ~4095) - i915->dsm.start; + pctx = i915_gem_object_create_stolen_for_preallocated(i915, + pcbr_offset, + I915_GTT_OFFSET_NONE, + pctx_size); + if (IS_ERR(pctx)) + return PTR_ERR(pctx); + + goto out; + } + + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + + /* + * From the Gunit register HAS: + * The Gfx driver is expected to program this register and ensure + * proper allocation within Gfx stolen memory. For example, this + * register should be programmed such than the PCBR range does not + * overlap with other ranges, such as the frame buffer, protected + * memory, or any other relevant ranges. + */ + pctx = i915_gem_object_create_stolen(i915, pctx_size); + if (IS_ERR(pctx)) { + DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); + return PTR_ERR(pctx); + } + + GEM_BUG_ON(range_overflows_t(u64, + i915->dsm.start, + pctx->stolen->start, + U32_MAX)); + pctx_paddr = i915->dsm.start + pctx->stolen->start; + intel_uncore_write(uncore, VLV_PCBR, pctx_paddr); + +out: + rc6->pctx = pctx; + return 0; +} + +static void chv_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* 2a: Program RC6 thresholds.*/ + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + set(uncore, GEN6_RC_SLEEP, 0); + + /* TO threshold set to 500 us (0x186 * 1.28 us) */ + set(uncore, GEN6_RC6_THRESHOLD, 0x186); + + /* Allows RC6 residency counter to work */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + /* 3: Enable RC6 */ + set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE); +} + +static void vlv_rc6_enable(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); + set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); + + for_each_engine(engine, rc6_to_gt(rc6), id) + set(uncore, RING_MAX_IDLE(engine->mmio_base), 10); + + set(uncore, GEN6_RC6_THRESHOLD, 0x557); + + /* Allows RC6 residency counter to work */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC0_COUNT_EN | + VLV_RENDER_RC0_COUNT_EN | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); + + set(uncore, GEN6_RC_CONTROL, + GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); +} + +static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6) +{ + struct intel_uncore *uncore = rc6_to_uncore(rc6); + struct drm_i915_private *i915 = rc6_to_i915(rc6); + u32 rc6_ctx_base, rc_ctl, rc_sw_target; + bool enable_rc6 = true; + + rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL); + rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE); + rc_sw_target &= RC_SW_TARGET_STATE_MASK; + rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT; + DRM_DEBUG_DRIVER("BIOS enabled RC states: " + "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", + onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), + onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), + rc_sw_target); + + if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) { + DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); + enable_rc6 = false; + } + + /* + * The exact context size is not known for BXT, so assume a page size + * for this check. + */ + rc6_ctx_base = + intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK; + if (!(rc6_ctx_base >= i915->dsm_reserved.start && + rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) { + DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); + enable_rc6 = false; + } + + if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 && + (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) { + DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) || + !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) || + !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) { + DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) { + DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); + enable_rc6 = false; + } + + if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) { + DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); + enable_rc6 = false; + } + + return enable_rc6; +} + +static bool rc6_supported(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!HAS_RC6(i915)) + return false; + + if (intel_vgpu_active(i915)) + return false; + + if (is_mock_gt(rc6_to_gt(rc6))) + return false; + + if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) { + dev_notice(i915->drm.dev, + "RC6 and powersaving disabled by BIOS\n"); + return false; + } + + return true; +} + +static void rpm_get(struct intel_rc6 *rc6) +{ + GEM_BUG_ON(rc6->wakeref); + pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev); + rc6->wakeref = true; +} + +static void rpm_put(struct intel_rc6 *rc6) +{ + GEM_BUG_ON(!rc6->wakeref); + pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev); + rc6->wakeref = false; +} + +static void __intel_rc6_disable(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + if (INTEL_GEN(i915) >= 9) + set(uncore, GEN9_PG_ENABLE, 0); + set(uncore, GEN6_RC_CONTROL, 0); + set(uncore, GEN6_RC_STATE, 0); + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); +} + +void intel_rc6_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + int err; + + /* Disable runtime-pm until we can save the GPU state with rc6 pctx */ + rpm_get(rc6); + + if (!rc6_supported(rc6)) + return; + + if (IS_CHERRYVIEW(i915)) + err = chv_rc6_init(rc6); + else if (IS_VALLEYVIEW(i915)) + err = vlv_rc6_init(rc6); + else + err = 0; + + /* Sanitize rc6, ensure it is disabled before we are ready. */ + __intel_rc6_disable(rc6); + + rc6->supported = err == 0; +} + +void intel_rc6_sanitize(struct intel_rc6 *rc6) +{ + if (rc6->supported) + __intel_rc6_disable(rc6); +} + +void intel_rc6_enable(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + + if (!rc6->supported) + return; + + GEM_BUG_ON(rc6->enabled); + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + if (IS_CHERRYVIEW(i915)) + chv_rc6_enable(rc6); + else if (IS_VALLEYVIEW(i915)) + vlv_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 11) + gen11_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 9) + gen9_rc6_enable(rc6); + else if (IS_BROADWELL(i915)) + gen8_rc6_enable(rc6); + else if (INTEL_GEN(i915) >= 6) + gen6_rc6_enable(rc6); + + intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); + + /* rc6 is ready, runtime-pm is go! */ + rpm_put(rc6); + rc6->enabled = true; +} + +void intel_rc6_disable(struct intel_rc6 *rc6) +{ + if (!rc6->enabled) + return; + + rpm_get(rc6); + rc6->enabled = false; + + __intel_rc6_disable(rc6); +} + +void intel_rc6_fini(struct intel_rc6 *rc6) +{ + struct drm_i915_gem_object *pctx; + + intel_rc6_disable(rc6); + + pctx = fetch_and_zero(&rc6->pctx); + if (pctx) + i915_gem_object_put(pctx); + + if (rc6->wakeref) + rpm_put(rc6); +} + +static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg) +{ + u32 lower, upper, tmp; + int loop = 2; + + /* + * The register accessed do not need forcewake. We borrow + * uncore lock to prevent concurrent access to range reg. + */ + lockdep_assert_held(&uncore->lock); + + /* + * vlv and chv residency counters are 40 bits in width. + * With a control bit, we can choose between upper or lower + * 32bit window into this counter. + * + * Although we always use the counter in high-range mode elsewhere, + * userspace may attempt to read the value before rc6 is initialised, + * before we have set the default VLV_COUNTER_CONTROL value. So always + * set the high bit to be safe. + */ + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = intel_uncore_read_fw(uncore, reg); + do { + tmp = upper; + + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); + lower = intel_uncore_read_fw(uncore, reg); + + set(uncore, VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); + upper = intel_uncore_read_fw(uncore, reg); + } while (upper != tmp && --loop); + + /* + * Everywhere else we always use VLV_COUNTER_CONTROL with the + * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set + * now. + */ + + return lower | (u64)upper << 8; +} + +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + struct intel_uncore *uncore = rc6_to_uncore(rc6); + u64 time_hw, prev_hw, overflow_hw; + unsigned int fw_domains; + unsigned long flags; + unsigned int i; + u32 mul, div; + + if (!rc6->supported) + return 0; + + /* + * Store previous hw counter values for counter wrap-around handling. + * + * There are only four interesting registers and they live next to each + * other so we can use the relative address, compared to the smallest + * one as the index into driver storage. + */ + i = (i915_mmio_reg_offset(reg) - + i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); + if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency))) + return 0; + + fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw_domains); + + /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) { + mul = 1000000; + div = i915->czclk_freq; + overflow_hw = BIT_ULL(40); + time_hw = vlv_residency_raw(uncore, reg); + } else { + /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ + if (IS_GEN9_LP(i915)) { + mul = 10000; + div = 12; + } else { + mul = 1280; + div = 1; + } + + overflow_hw = BIT_ULL(32); + time_hw = intel_uncore_read_fw(uncore, reg); + } + + /* + * Counter wrap handling. + * + * But relying on a sufficient frequency of queries otherwise counters + * can still wrap. + */ + prev_hw = rc6->prev_hw_residency[i]; + rc6->prev_hw_residency[i] = time_hw; + + /* RC6 delta from last sample. */ + if (time_hw >= prev_hw) + time_hw -= prev_hw; + else + time_hw += overflow_hw - prev_hw; + + /* Add delta to RC6 extended raw driver copy. */ + time_hw += rc6->cur_residency[i]; + rc6->cur_residency[i] = time_hw; + + intel_uncore_forcewake_put__locked(uncore, fw_domains); + spin_unlock_irqrestore(&uncore->lock, flags); + + return mul_u64_u32_div(time_hw, mul, div); +} + +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg) +{ + return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000); +} diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h new file mode 100644 index 000000000000..5e6711f36457 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RC6_H +#define INTEL_RC6_H + +#include "i915_reg.h" + +struct intel_engine_cs; +struct intel_rc6; + +void intel_rc6_init(struct intel_rc6 *rc6); +void intel_rc6_fini(struct intel_rc6 *rc6); + +void intel_rc6_sanitize(struct intel_rc6 *rc6); +void intel_rc6_enable(struct intel_rc6 *rc6); +void intel_rc6_disable(struct intel_rc6 *rc6); + +u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg); +u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg); + +#endif /* INTEL_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h new file mode 100644 index 000000000000..214f354d6ae4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_RC6_TYPES_H +#define INTEL_RC6_TYPES_H + +#include <linux/spinlock.h> +#include <linux/types.h> + +#include "intel_engine_types.h" + +struct drm_i915_gem_object; + +struct intel_rc6 { + u64 prev_hw_residency[4]; + u64 cur_residency[4]; + + struct drm_i915_gem_object *pctx; + + bool supported : 1; + bool enabled : 1; + bool wakeref : 1; +}; + +#endif /* INTEL_RC6_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b9d84d52e986..bf8d1ed4b1d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -42,11 +42,10 @@ static void engine_skip_context(struct i915_request *rq) struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *hung_ctx = rq->gem_context; - lockdep_assert_held(&engine->active.lock); - if (!i915_request_is_active(rq)) return; + lockdep_assert_held(&engine->active.lock); list_for_each_entry_continue(rq, &engine->active.requests, sched.link) if (rq->gem_context == hung_ctx) i915_request_skip(rq, -EIO); @@ -123,7 +122,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) rq->fence.seqno, yesno(guilty)); - lockdep_assert_held(&rq->engine->active.lock); GEM_BUG_ON(i915_request_completed(rq)); if (guilty) { @@ -284,14 +282,14 @@ static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - struct intel_engine_cs *engine; - const u32 hw_engine_mask[] = { + static const u32 hw_engine_mask[] = { [RCS0] = GEN6_GRDOM_RENDER, [BCS0] = GEN6_GRDOM_BLT, [VCS0] = GEN6_GRDOM_MEDIA, [VCS1] = GEN8_GRDOM_MEDIA2, [VECS0] = GEN6_GRDOM_VECS, }; + struct intel_engine_cs *engine; u32 hw_mask; if (engine_mask == ALL_ENGINES) { @@ -300,7 +298,7 @@ static int gen6_reset_engines(struct intel_gt *gt, intel_engine_mask_t tmp; hw_mask = 0; - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; } @@ -309,7 +307,7 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } -static u32 gen11_lock_sfc(struct intel_engine_cs *engine) +static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) { struct intel_uncore *uncore = engine->uncore; u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; @@ -318,6 +316,7 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine) i915_reg_t sfc_usage; u32 sfc_usage_bit; u32 sfc_reset_bit; + int ret; switch (engine->class) { case VIDEO_DECODE_CLASS: @@ -352,27 +351,33 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine) } /* - * Tell the engine that a software reset is going to happen. The engine - * will then try to force lock the SFC (if currently locked, it will - * remain so until we tell the engine it is safe to unlock; if currently - * unlocked, it will ignore this and all new lock requests). If SFC - * ends up being locked to the engine we want to reset, we have to reset - * it as well (we will unlock it once the reset sequence is completed). + * If the engine is using a SFC, tell the engine that a software reset + * is going to happen. The engine will then try to force lock the SFC. + * If SFC ends up being locked to the engine we want to reset, we have + * to reset it as well (we will unlock it once the reset sequence is + * completed). */ + if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) + return 0; + rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); - if (__intel_wait_for_register_fw(uncore, - sfc_forced_lock_ack, - sfc_forced_lock_ack_bit, - sfc_forced_lock_ack_bit, - 1000, 0, NULL)) { - DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + ret = __intel_wait_for_register_fw(uncore, + sfc_forced_lock_ack, + sfc_forced_lock_ack_bit, + sfc_forced_lock_ack_bit, + 1000, 0, NULL); + + /* Was the SFC released while we were trying to lock it? */ + if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) return 0; - } - if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit) - return sfc_reset_bit; + if (ret) { + DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); + return ret; + } + *hw_mask |= sfc_reset_bit; return 0; } @@ -408,7 +413,7 @@ static int gen11_reset_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask, unsigned int retry) { - const u32 hw_engine_mask[] = { + static const u32 hw_engine_mask[] = { [RCS0] = GEN11_GRDOM_RENDER, [BCS0] = GEN11_GRDOM_BLT, [VCS0] = GEN11_GRDOM_MEDIA, @@ -427,17 +432,26 @@ static int gen11_reset_engines(struct intel_gt *gt, hw_mask = GEN11_GRDOM_FULL; } else { hw_mask = 0; - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); hw_mask |= hw_engine_mask[engine->id]; - hw_mask |= gen11_lock_sfc(engine); + ret = gen11_lock_sfc(engine, &hw_mask); + if (ret) + goto sfc_unlock; } } ret = gen6_hw_domain_reset(gt, hw_mask); +sfc_unlock: + /* + * We unlock the SFC based on the lock status and not the result of + * gen11_lock_sfc to make sure that we clean properly if something + * wrong happened during the lock (e.g. lock acquired after timeout + * expiration). + */ if (engine_mask != ALL_ENGINES) - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) + for_each_engine_masked(engine, gt, engine_mask, tmp) gen11_unlock_sfc(engine); return ret; @@ -496,7 +510,7 @@ static int gen8_reset_engines(struct intel_gt *gt, intel_engine_mask_t tmp; int ret; - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) goto skip_reset; @@ -522,19 +536,30 @@ static int gen8_reset_engines(struct intel_gt *gt, ret = gen6_reset_engines(gt, engine_mask, retry); skip_reset: - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) + for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); return ret; } +static int mock_reset(struct intel_gt *gt, + intel_engine_mask_t mask, + unsigned int retry) +{ + return 0; +} + typedef int (*reset_func)(struct intel_gt *, intel_engine_mask_t engine_mask, unsigned int retry); -static reset_func intel_get_gpu_reset(struct drm_i915_private *i915) +static reset_func intel_get_gpu_reset(const struct intel_gt *gt) { - if (INTEL_GEN(i915) >= 8) + struct drm_i915_private *i915 = gt->i915; + + if (is_mock_gt(gt)) + return mock_reset; + else if (INTEL_GEN(i915) >= 8) return gen8_reset_engines; else if (INTEL_GEN(i915) >= 6) return gen6_reset_engines; @@ -557,7 +582,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) int ret = -ETIMEDOUT; int retry; - reset = intel_get_gpu_reset(gt->i915); + reset = intel_get_gpu_reset(gt); if (!reset) return -ENODEV; @@ -577,17 +602,20 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) return ret; } -bool intel_has_gpu_reset(struct drm_i915_private *i915) +bool intel_has_gpu_reset(const struct intel_gt *gt) { if (!i915_modparams.reset) return NULL; - return intel_get_gpu_reset(i915); + return intel_get_gpu_reset(gt); } -bool intel_has_reset_engine(struct drm_i915_private *i915) +bool intel_has_reset_engine(const struct intel_gt *gt) { - return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2; + if (i915_modparams.reset < 2) + return false; + + return INTEL_INFO(gt->i915)->has_reset_engine; } int intel_reset_guc(struct intel_gt *gt) @@ -654,7 +682,7 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt) intel_engine_mask_t awake = 0; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (intel_engine_pm_get_if_awake(engine)) awake |= engine->mask; reset_prepare_engine(engine); @@ -684,10 +712,10 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) if (err) return err; - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) __intel_engine_reset(engine, stalled_mask & engine->mask); - i915_gem_restore_fences(gt->i915); + i915_gem_restore_fences(gt->ggtt); return err; } @@ -697,7 +725,7 @@ static void reset_finish_engine(struct intel_engine_cs *engine) engine->reset.finish(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); - intel_engine_signal_breadcrumbs(engine); + intel_engine_breadcrumbs_irq(engine); } static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) @@ -705,7 +733,7 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { reset_finish_engine(engine); if (awake & engine->mask) intel_engine_pm_put(engine); @@ -741,7 +769,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) { struct drm_printer p = drm_debug_printer(__func__); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) intel_engine_dump(engine, &p, "%s\n", engine->name); } @@ -758,7 +786,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) __intel_gt_reset(gt, ALL_ENGINES); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) engine->submit_request = nop_submit_request; /* @@ -770,7 +798,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) set_bit(I915_WEDGED, >->reset.flags); /* Mark all executing requests as skipped */ - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) engine->cancel_requests(engine); reset_finish(gt, awake); @@ -783,7 +811,7 @@ void intel_gt_set_wedged(struct intel_gt *gt) intel_wakeref_t wakeref; mutex_lock(>->reset.mutex); - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) __intel_gt_set_wedged(gt); mutex_unlock(>->reset.mutex); } @@ -793,11 +821,13 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl; unsigned long flags; + bool ok; if (!test_bit(I915_WEDGED, >->reset.flags)) return true; - if (!gt->scratch) /* Never full initialised, recovery impossible */ + /* Never fully initialised, recovery impossible */ + if (test_bit(I915_WEDGED_ON_INIT, >->reset.flags)) return false; GEM_TRACE("start\n"); @@ -814,10 +844,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) */ spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry(tl, &timelines->active_list, link) { - struct i915_request *rq; + struct dma_fence *fence; - rq = i915_active_request_get_unlocked(&tl->last_request); - if (!rq) + fence = i915_active_fence_get(&tl->last_request); + if (!fence) continue; spin_unlock_irqrestore(&timelines->lock, flags); @@ -829,8 +859,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * (I915_FENCE_TIMEOUT) so this wait should not be unbounded * in the worst case. */ - dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); + dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT); + dma_fence_put(fence); /* Restart iteration after droping lock */ spin_lock_irqsave(&timelines->lock, flags); @@ -838,7 +868,18 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) } spin_unlock_irqrestore(&timelines->lock, flags); - intel_gt_sanitize(gt, false); + /* We must reset pending GPU events before restoring our submission */ + ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ + if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) + ok = __intel_gt_reset(gt, ALL_ENGINES) == 0; + if (!ok) { + /* + * Warn CI about the unrecoverable wedged condition. + * Time for a reboot. + */ + add_taint_for_CI(TAINT_WARN); + return false; + } /* * Undo nop_submit_request. We prevent all new i915 requests from @@ -893,7 +934,7 @@ static int resume(struct intel_gt *gt) enum intel_engine_id id; int ret; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { ret = engine->resume(engine); if (ret) return ret; @@ -943,7 +984,7 @@ void intel_gt_reset(struct intel_gt *gt, awake = reset_prepare(gt); - if (!intel_has_gpu_reset(gt->i915)) { + if (!intel_has_gpu_reset(gt)) { if (i915_modparams.reset) dev_err(gt->i915->drm.dev, "GPU reset not supported\n"); else @@ -972,7 +1013,7 @@ void intel_gt_reset(struct intel_gt *gt, * was running at the time of the reset (i.e. we weren't VT * switched away). */ - ret = i915_gem_init_hw(gt->i915); + ret = intel_gt_init_hw(gt); if (ret) { DRM_ERROR("Failed to initialise HW following reset (%d)\n", ret); @@ -1151,7 +1192,7 @@ void intel_gt_handle_error(struct intel_gt *gt, * isn't the case at least when we get here by doing a * simulated reset via debugfs, so get an RPM reference. */ - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); engine_mask &= INTEL_INFO(gt->i915)->engine_mask; @@ -1164,8 +1205,8 @@ void intel_gt_handle_error(struct intel_gt *gt, * Try engine reset when available. We fall back to full reset if * single reset fails. */ - if (intel_has_reset_engine(gt->i915) && !intel_gt_is_wedged(gt)) { - for_each_engine_masked(engine, gt->i915, engine_mask, tmp) { + if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { + for_each_engine_masked(engine, gt, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) @@ -1193,7 +1234,7 @@ void intel_gt_handle_error(struct intel_gt *gt, synchronize_rcu_expedited(); /* Prevent any other reset-engine attempt. */ - for_each_engine(engine, gt->i915, tmp) { + for_each_engine(engine, gt, tmp) { while (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) wait_on_bit(>->reset.flags, @@ -1203,7 +1244,7 @@ void intel_gt_handle_error(struct intel_gt *gt, intel_gt_reset_global(gt, engine_mask, msg); - for_each_engine(engine, gt->i915, tmp) + for_each_engine(engine, gt, tmp) clear_bit_unlock(I915_RESET_ENGINE + engine->id, >->reset.flags); clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags); @@ -1211,13 +1252,11 @@ void intel_gt_handle_error(struct intel_gt *gt, wake_up_all(>->reset.queue); out: - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); } -int intel_gt_reset_trylock(struct intel_gt *gt) +int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) { - int srcu; - might_lock(>->reset.backoff_srcu); might_sleep(); @@ -1232,10 +1271,10 @@ int intel_gt_reset_trylock(struct intel_gt *gt) rcu_read_lock(); } - srcu = srcu_read_lock(>->reset.backoff_srcu); + *srcu = srcu_read_lock(>->reset.backoff_srcu); rcu_read_unlock(); - return srcu; + return 0; } void intel_gt_reset_unlock(struct intel_gt *gt, int tag) @@ -1255,10 +1294,6 @@ int intel_gt_terminally_wedged(struct intel_gt *gt) if (!test_bit(I915_RESET_BACKOFF, >->reset.flags)) return -EIO; - /* XXX intel_reset_finish() still takes struct_mutex!!! */ - if (mutex_is_locked(>->i915->drm.struct_mutex)) - return -EAGAIN; - if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, >->reset.flags))) @@ -1267,6 +1302,14 @@ int intel_gt_terminally_wedged(struct intel_gt *gt) return intel_gt_is_wedged(gt) ? -EIO : 0; } +void intel_gt_set_wedged_on_init(struct intel_gt *gt) +{ + BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES > + I915_WEDGED_ON_INIT); + intel_gt_set_wedged(gt); + set_bit(I915_WEDGED_ON_INIT, >->reset.flags); +} + void intel_gt_init_reset(struct intel_gt *gt) { init_waitqueue_head(>->reset.queue); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 37a987b17108..8e8d5f761166 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -14,7 +14,6 @@ #include "intel_engine_types.h" #include "intel_reset_types.h" -struct drm_i915_private; struct i915_request; struct intel_engine_cs; struct intel_gt; @@ -38,13 +37,19 @@ int intel_engine_reset(struct intel_engine_cs *engine, void __i915_request_reset(struct i915_request *rq, bool guilty); -int __must_check intel_gt_reset_trylock(struct intel_gt *gt); +int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu); void intel_gt_reset_unlock(struct intel_gt *gt, int tag); void intel_gt_set_wedged(struct intel_gt *gt); bool intel_gt_unset_wedged(struct intel_gt *gt); int intel_gt_terminally_wedged(struct intel_gt *gt); +/* + * There's no unset_wedged_on_init paired with this one. + * Once we're wedged on init, there's no going back. + */ +void intel_gt_set_wedged_on_init(struct intel_gt *gt); + int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask); int intel_reset_guc(struct intel_gt *gt); @@ -68,10 +73,13 @@ void __intel_fini_wedge(struct intel_wedge_me *w); static inline bool __intel_reset_failed(const struct intel_reset *reset) { + GEM_BUG_ON(test_bit(I915_WEDGED_ON_INIT, &reset->flags) ? + !test_bit(I915_WEDGED, &reset->flags) : false); + return unlikely(test_bit(I915_WEDGED, &reset->flags)); } -bool intel_has_gpu_reset(struct drm_i915_private *i915); -bool intel_has_reset_engine(struct drm_i915_private *i915); +bool intel_has_gpu_reset(const struct intel_gt *gt); +bool intel_has_reset_engine(const struct intel_gt *gt); #endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h index 31968356e0c0..f43bc3a0fe4f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset_types.h +++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h @@ -29,11 +29,17 @@ struct intel_reset { * we set the #I915_WEDGED bit. Prior to command submission, e.g. * i915_request_alloc(), this bit is checked and the sequence * aborted (with -EIO reported to userspace) if set. + * + * #I915_WEDGED_ON_INIT - If we fail to initialize the GPU we can no + * longer use the GPU - similar to #I915_WEDGED bit. The difference in + * in the way we're handling "forced" unwedged (e.g. through debugfs), + * which is not allowed in case we failed to initialize. */ unsigned long flags; #define I915_RESET_BACKOFF 0 #define I915_RESET_MODESET 1 #define I915_RESET_ENGINE 2 +#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 2) #define I915_WEDGED (BITS_PER_LONG - 1) struct mutex mutex; /* serialises wedging/unwedging */ diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 601c16239fdf..bf631f15aa78 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -322,7 +322,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = i915_request_active_timeline(rq)->hwsp_offset | + PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -425,7 +426,7 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); - *cs++ = rq->timeline->hwsp_offset; + *cs++ = i915_request_active_timeline(rq)->hwsp_offset; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -439,8 +440,8 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; @@ -459,8 +460,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; @@ -930,6 +931,7 @@ static void cancel_requests(struct intel_engine_cs *engine) static void i9xx_submit_request(struct i915_request *request) { i915_request_submit(request); + wmb(); /* paranoid flush writes out of the WCB before mmio */ ENGINE_WRITE(request->engine, RING_TAIL, intel_ring_set_tail(request->ring, request->tail)); @@ -937,8 +939,8 @@ static void i9xx_submit_request(struct i915_request *request) static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) { - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; @@ -960,8 +962,8 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) { int i; - GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); + GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); *cs++ = MI_FLUSH; @@ -1272,7 +1274,7 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) struct i915_vma *vma; obj = i915_gem_object_create_stolen(i915, size); - if (!obj) + if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) return ERR_CAST(obj); @@ -1336,15 +1338,13 @@ void intel_ring_free(struct kref *ref) { struct intel_ring *ring = container_of(ref, typeof(*ring), ref); - i915_vma_close(ring->vma); i915_vma_put(ring->vma); - kfree(ring); } static void __ring_context_fini(struct intel_context *ce) { - i915_gem_object_put(ce->state->obj); + i915_vma_put(ce->state); } static void ring_context_destroy(struct kref *ref) @@ -1573,7 +1573,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct intel_engine_cs *engine = rq->engine; enum intel_engine_id id; const int num_engines = - IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; + IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0; bool force_restore = false; int len; u32 *cs; @@ -1609,7 +1609,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) struct intel_engine_cs *signaller; *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1663,7 +1663,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) i915_reg_t last_reg = {}; /* keep gcc quiet */ *cs++ = MI_LOAD_REGISTER_IMM(num_engines); - for_each_engine(signaller, i915, id) { + for_each_engine(signaller, engine->gt, id) { if (signaller == engine) continue; @@ -1676,7 +1676,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* Insert a delay before the next switch! */ *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; *cs++ = i915_mmio_reg_offset(last_reg); - *cs++ = intel_gt_scratch_offset(rq->engine->gt, + *cs++ = intel_gt_scratch_offset(engine->gt, INTEL_GT_SCRATCH_FIELD_DEFAULT); *cs++ = MI_NOOP; } @@ -1741,46 +1741,22 @@ static int remap_l3(struct i915_request *rq) static int switch_context(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; - struct i915_address_space *vm = vm_alias(rq->hw_context); - unsigned int unwind_mm = 0; - u32 hw_flags = 0; + struct intel_context *ce = rq->hw_context; + struct i915_address_space *vm = vm_alias(ce); int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); if (vm) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - int loops; - - /* - * Baytail takes a little more convincing that it really needs - * to reload the PD between contexts. It is not just a little - * longer, as adding more stalls after the load_pd_dir (i.e. - * adding a long loop around flush_pd_dir) is not as effective - * as reloading the PD umpteen times. 32 is derived from - * experimentation (gem_exec_parallel/fds) and has no good - * explanation. - */ - loops = 1; - if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915)) - loops = 32; - - do { - ret = load_pd_dir(rq, ppgtt); - if (ret) - goto err; - } while (--loops); - - if (ppgtt->pd_dirty_engines & engine->mask) { - unwind_mm = engine->mask; - ppgtt->pd_dirty_engines &= ~unwind_mm; - hw_flags = MI_FORCE_RESTORE; - } + ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm)); + if (ret) + return ret; } - if (rq->hw_context->state) { - GEM_BUG_ON(engine->id != RCS0); + if (ce->state) { + u32 hw_flags; + + GEM_BUG_ON(rq->engine->id != RCS0); /* * The kernel context(s) is treated as pure scratch and is not @@ -1789,22 +1765,25 @@ static int switch_context(struct i915_request *rq) * as nothing actually executes using the kernel context; it * is purely used for flushing user contexts. */ + hw_flags = 0; if (i915_gem_context_is_kernel(rq->gem_context)) hw_flags = MI_RESTORE_INHIBIT; ret = mi_set_context(rq, hw_flags); if (ret) - goto err_mm; + return ret; } if (vm) { + struct intel_engine_cs *engine = rq->engine; + ret = engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) - goto err_mm; + return ret; ret = flush_pd_dir(rq); if (ret) - goto err_mm; + return ret; /* * Not only do we need a full barrier (post-sync write) after @@ -1816,24 +1795,18 @@ static int switch_context(struct i915_request *rq) */ ret = engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) - goto err_mm; + return ret; ret = engine->emit_flush(rq, EMIT_FLUSH); if (ret) - goto err_mm; + return ret; } ret = remap_l3(rq); if (ret) - goto err_mm; + return ret; return 0; - -err_mm: - if (unwind_mm) - i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm; -err: - return ret; } static int ring_request_alloc(struct i915_request *request) @@ -1841,7 +1814,7 @@ static int ring_request_alloc(struct i915_request *request) int ret; GEM_BUG_ON(!intel_context_is_pinned(request->hw_context)); - GEM_BUG_ON(request->timeline->has_initial_breadcrumb); + GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); /* * Flush enough space to reduce the likelihood of waiting after @@ -1952,7 +1925,9 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) */ GEM_BUG_ON(!rq->reserved_space); - ret = wait_for_space(ring, rq->timeline, total_bytes); + ret = wait_for_space(ring, + i915_request_timeline(rq), + total_bytes); if (unlikely(ret)) return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 6bf2d87da109..74f793423231 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -8,6 +8,19 @@ #include "intel_lrc_reg.h" #include "intel_sseu.h" +void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, + u8 max_subslices, u8 max_eus_per_subslice) +{ + sseu->max_slices = max_slices; + sseu->max_subslices = max_subslices; + sseu->max_eus_per_subslice = max_eus_per_subslice; + + sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); + GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); + sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); + GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); +} + unsigned int intel_sseu_subslice_total(const struct sseu_dev_info *sseu) { @@ -19,10 +32,32 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) return total; } +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +{ + int i, offset = slice * sseu->ss_stride; + u32 mask = 0; + + GEM_BUG_ON(slice >= sseu->max_slices); + + for (i = 0; i < sseu->ss_stride; i++) + mask |= (u32)sseu->subslice_mask[offset + i] << + i * BITS_PER_BYTE; + + return mask; +} + +void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, + u32 ss_mask) +{ + int offset = slice * sseu->ss_stride; + + memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); +} + unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) { - return hweight8(sseu->subslice_mask[slice]); + return hweight32(intel_sseu_get_subslices(sseu, slice)); } u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index b50d0401a4e2..d1d225204f09 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -10,15 +10,21 @@ #include <linux/types.h> #include <linux/kernel.h> +#include "i915_gem.h" + struct drm_i915_private; #define GEN_MAX_SLICES (6) /* CNL upper bound */ #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) +#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) +#define GEN_MAX_EUS (16) /* TGL upper bound */ +#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) struct sseu_dev_info { u8 slice_mask; - u8 subslice_mask[GEN_MAX_SLICES]; + u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; @@ -33,11 +39,8 @@ struct sseu_dev_info { u8 max_subslices; u8 max_eus_per_subslice; - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; + u8 ss_stride; + u8 eu_stride; }; /* @@ -63,12 +66,34 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu) return value; } +static inline bool +intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice, + int subslice) +{ + u8 mask; + int ss_idx = subslice / BITS_PER_BYTE; + + GEM_BUG_ON(ss_idx >= sseu->ss_stride); + + mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx]; + + return mask & BIT(subslice % BITS_PER_BYTE); +} + +void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, + u8 max_subslices, u8 max_eus_per_subslice); + unsigned int intel_sseu_subslice_total(const struct sseu_dev_info *sseu); unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); + +void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, + u32 ss_mask); + u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, const struct intel_sseu *req_sseu); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 9cb01d9828f1..0f959694303c 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -136,6 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) kfree(cl); } +__i915_active_call static void __cacheline_retire(struct i915_active *active) { struct intel_timeline_cacheline *cl = @@ -177,8 +178,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) cl->hwsp = hwsp; cl->vaddr = page_pack_bits(vaddr, cacheline); - i915_active_init(hwsp->gt->i915, &cl->active, - __cacheline_active, __cacheline_retire); + i915_active_init(&cl->active, __cacheline_active, __cacheline_retire); return cl; } @@ -254,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline, mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); @@ -442,7 +442,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, * free it after the current request is retired, which ensures that * all writes into the cacheline from previous requests are complete. */ - err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq); + err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence); if (err) goto err_cacheline; @@ -493,24 +493,39 @@ int intel_timeline_get_seqno(struct intel_timeline *tl, static int cacheline_ref(struct intel_timeline_cacheline *cl, struct i915_request *rq) { - return i915_active_ref(&cl->active, rq->timeline, rq); + return i915_active_add_request(&cl->active, rq); } int intel_timeline_read_hwsp(struct i915_request *from, struct i915_request *to, u32 *hwsp) { - struct intel_timeline_cacheline *cl = from->hwsp_cacheline; - struct intel_timeline *tl = from->timeline; + struct intel_timeline *tl; int err; - GEM_BUG_ON(to->timeline == tl); + rcu_read_lock(); + tl = rcu_dereference(from->timeline); + if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref)) + tl = NULL; + rcu_read_unlock(); + if (!tl) /* already completed */ + return 1; + + GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl); + + err = -EBUSY; + if (mutex_trylock(&tl->mutex)) { + struct intel_timeline_cacheline *cl = from->hwsp_cacheline; + + if (i915_request_completed(from)) { + err = 1; + goto unlock; + } - mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); - err = i915_request_completed(from); - if (!err) err = cacheline_ref(cl, to); - if (!err) { + if (err) + goto unlock; + if (likely(cl == tl->hwsp_cacheline)) { *hwsp = tl->hwsp_offset; } else { /* across a seqno wrap, recover the original offset */ @@ -518,8 +533,11 @@ int intel_timeline_read_hwsp(struct i915_request *from, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES; } + +unlock: + mutex_unlock(&tl->mutex); } - mutex_unlock(&tl->mutex); + intel_timeline_put(tl); return err; } @@ -541,7 +559,7 @@ void __intel_timeline_free(struct kref *kref) container_of(kref, typeof(*timeline), kref); intel_timeline_fini(timeline); - kfree(timeline); + kfree_rcu(timeline, rcu); } static void timelines_fini(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 2b1baf2fcc8e..98d9ee166379 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -58,12 +58,13 @@ struct intel_timeline { */ struct list_head requests; - /* Contains an RCU guarded pointer to the last request. No reference is + /* + * Contains an RCU guarded pointer to the last request. No reference is * held to the request, users must carefully acquire a reference to - * the request using i915_active_request_get_request_rcu(), or hold the - * struct_mutex. + * the request using i915_active_fence_get(), or manage the RCU + * protection themselves (cf the i915_active_fence API). */ - struct i915_active_request last_request; + struct i915_active_fence last_request; /** * We track the most recent seqno that we wait on in every context so @@ -80,6 +81,7 @@ struct intel_timeline { struct intel_gt *gt; struct kref kref; + struct rcu_head rcu; }; #endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 45481eb1fa3c..af8a8183154a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -567,6 +567,9 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + /* Wa_1409142259:tgl */ + WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); } static void @@ -796,11 +799,10 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) } slice = fls(sseu->slice_mask) - 1; - GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask)); - subslice = fls(l3_en & sseu->subslice_mask[slice]); + subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); if (!subslice) { DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n", - sseu->subslice_mask[slice], l3_en); + intel_sseu_get_subslices(sseu, slice), l3_en); subslice = fls(l3_en); WARN_ON(!subslice); } @@ -890,11 +892,27 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) wa_write_or(wal, GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); + + /* Wa_1607087056:icl */ + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); } static void tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { + /* Wa_1409420604:tgl */ + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + wa_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE2, + CPSSUNIT_CLKGATE_DIS); + + /* Wa_1409180338:tgl */ + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); } static void @@ -1063,6 +1081,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w) /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ whitelist_reg(w, GEN8_HDC_CHICKEN1); + + /* WaSendPushConstantsFromMMIO:skl,bxt */ + whitelist_reg(w, COMMON_SLICE_CHICKEN2); } static void skl_whitelist_build(struct intel_engine_cs *engine) @@ -1255,6 +1276,26 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) { + /* Wa_1606700617:tgl */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + + /* Wa_1607138336:tgl */ + wa_write_or(wal, + GEN9_CTX_PREEMPT_REG, + GEN12_DISABLE_POSH_BUSY_FF_DOP_CG); + + /* Wa_1607030317:tgl */ + /* Wa_1607186500:tgl */ + /* Wa_1607297627:tgl */ + wa_masked_en(wal, + GEN6_RC_SLEEP_PSMI_CONTROL, + GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | + GEN8_RC_SEMA_IDLE_MSG_DISABLE); + } + if (IS_GEN(i915, 11)) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, @@ -1449,7 +1490,7 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset) * which only controls CPU initiated MMIO. Routing does not * work for CS access so we cannot verify them on this path. */ - if (INTEL_GEN(i915) >= 8 && (offset >= 0xb100 && offset <= 0xb3ff)) + if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff)) return true; return false; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 5d43cbc3f345..123db2c3f956 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -240,6 +240,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, struct mock_engine *engine; GEM_BUG_ON(id >= I915_NUM_ENGINES); + GEM_BUG_ON(!i915->gt.uncore); engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); if (!engine) @@ -248,9 +249,11 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, /* minimal engine setup for requests */ engine->base.i915 = i915; engine->base.gt = &i915->gt; + engine->base.uncore = i915->gt.uncore; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); engine->base.id = id; engine->base.mask = BIT(id); + engine->base.legacy_idx = INVALID_ENGINE; engine->base.instance = id; engine->base.status_page.addr = (void *)(engine + 1); @@ -265,6 +268,9 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.reset.finish = mock_reset_finish; engine->base.cancel_requests = mock_cancel_requests; + i915->gt.engine[id] = &engine->base; + i915->gt.engine_class[0][id] = &engine->base; + /* fake hw queue */ spin_lock_init(&engine->hw_lock); timer_setup(&engine->hw_delay, hw_delay_complete, 0); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 9d1ea26c7a2d..f63a26a3e620 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -14,22 +14,28 @@ static int request_sync(struct i915_request *rq) { + struct intel_timeline *tl = i915_request_timeline(rq); long timeout; int err = 0; + intel_timeline_get(tl); i915_request_get(rq); - i915_request_add(rq); + /* Opencode i915_request_add() so we can keep the timeline locked. */ + __i915_request_commit(rq); + __i915_request_queue(rq, NULL); + timeout = i915_request_wait(rq, 0, HZ / 10); - if (timeout < 0) { + if (timeout < 0) err = timeout; - } else { - mutex_lock(&rq->timeline->mutex); + else i915_request_retire_upto(rq); - mutex_unlock(&rq->timeline->mutex); - } + + lockdep_unpin_lock(&tl->mutex, rq->cookie); + mutex_unlock(&tl->mutex); i915_request_put(rq); + intel_timeline_put(tl); return err; } @@ -41,24 +47,20 @@ static int context_sync(struct intel_context *ce) mutex_lock(&tl->mutex); do { - struct i915_request *rq; + struct dma_fence *fence; long timeout; - rcu_read_lock(); - rq = rcu_dereference(tl->last_request.request); - if (rq) - rq = i915_request_get_rcu(rq); - rcu_read_unlock(); - if (!rq) + fence = i915_active_fence_get(&tl->last_request); + if (!fence) break; - timeout = i915_request_wait(rq, 0, HZ / 10); + timeout = dma_fence_wait_timeout(fence, false, HZ / 10); if (timeout < 0) err = timeout; else - i915_request_retire_upto(rq); + i915_request_retire_upto(to_request(fence)); - i915_request_put(rq); + dma_fence_put(fence); } while (!err); mutex_unlock(&tl->mutex); @@ -153,15 +155,11 @@ static int live_context_size(void *arg) * HW tries to write past the end of one. */ - mutex_lock(>->i915->drm.struct_mutex); - fixme = kernel_context(gt->i915); - if (IS_ERR(fixme)) { - err = PTR_ERR(fixme); - goto unlock; - } + if (IS_ERR(fixme)) + return PTR_ERR(fixme); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct { struct drm_i915_gem_object *state; void *pinned; @@ -199,8 +197,6 @@ static int live_context_size(void *arg) } kernel_context_close(fixme); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -303,26 +299,23 @@ static int live_active_context(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); - fixme = live_context(gt->i915, file); if (IS_ERR(fixme)) { err = PTR_ERR(fixme); - goto unlock; + goto out_file; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = __live_active_context(engine, fixme); if (err) break; - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } -unlock: - mutex_unlock(>->i915->drm.struct_mutex); +out_file: mock_file_free(gt->i915, file); return err; } @@ -416,26 +409,23 @@ static int live_remote_context(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); - fixme = live_context(gt->i915, file); if (IS_ERR(fixme)) { err = PTR_ERR(fixme); - goto unlock; + goto out_file; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = __live_remote_context(engine, fixme); if (err) break; - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } -unlock: - mutex_unlock(>->i915->drm.struct_mutex); +out_file: mock_file_free(gt->i915, file); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 3a1419376912..20b9c83f43ad 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -25,7 +25,7 @@ static int live_engine_pm(void *arg) } GEM_BUG_ON(intel_gt_pm_is_awake(gt)); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { const typeof(*igt_atomic_phases) *p; for (p = igt_atomic_phases; p->name; p++) { diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c new file mode 100644 index 000000000000..5d429037cdad --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -0,0 +1,59 @@ + +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "selftest_llc.h" + +static int live_gt_resume(void *arg) +{ + struct intel_gt *gt = arg; + IGT_TIMEOUT(end_time); + int err; + + /* Do several suspend/resume cycles to check we don't explode! */ + do { + intel_gt_suspend(gt); + + if (gt->rc6.enabled) { + pr_err("rc6 still enabled after suspend!\n"); + intel_gt_set_wedged_on_init(gt); + err = -EINVAL; + break; + } + + err = intel_gt_resume(gt); + if (err) + break; + + if (gt->rc6.supported && !gt->rc6.enabled) { + pr_err("rc6 not enabled upon resume!\n"); + intel_gt_set_wedged_on_init(gt); + err = -EINVAL; + break; + } + + err = st_llc_verify(>->llc); + if (err) { + pr_err("llc state not restored upon resume!\n"); + intel_gt_set_wedged_on_init(gt); + break; + } + } while (!__igt_timeout(end_time, NULL)); + + return err; +} + +int intel_gt_pm_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_gt_resume), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index a0098fc35921..8e0016464325 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -131,7 +131,7 @@ static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { struct intel_gt *gt = h->gt; - struct i915_address_space *vm = h->ctx->vm ?: &engine->gt->ggtt->vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx); struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; @@ -141,12 +141,15 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) int err; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { + i915_vm_put(vm); return ERR_CAST(obj); + } vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915)); if (IS_ERR(vaddr)) { i915_gem_object_put(obj); + i915_vm_put(vm); return ERR_CAST(vaddr); } @@ -157,16 +160,22 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) h->batch = vaddr; vma = i915_vma_instance(h->obj, vm, NULL); - if (IS_ERR(vma)) + if (IS_ERR(vma)) { + i915_vm_put(vm); return ERR_CAST(vma); + } hws = i915_vma_instance(h->hws, vm, NULL); - if (IS_ERR(hws)) + if (IS_ERR(hws)) { + i915_vm_put(vm); return ERR_CAST(hws); + } err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) + if (err) { + i915_vm_put(vm); return ERR_PTR(err); + } err = i915_vma_pin(hws, 0, 0, PIN_USER); if (err) @@ -264,6 +273,7 @@ unpin_hws: i915_vma_unpin(hws); unpin_vma: i915_vma_unpin(vma); + i915_vm_put(vm); return err ? ERR_PTR(err) : rq; } @@ -285,7 +295,7 @@ static void hang_fini(struct hang *h) kernel_context_close(h->ctx); - igt_flush_test(h->gt->i915, I915_WAIT_LOCKED); + igt_flush_test(h->gt->i915); } static bool wait_until_running(struct hang *h, struct i915_request *rq) @@ -309,12 +319,11 @@ static int igt_hang_sanitycheck(void *arg) /* Basic check that we can execute our hanging batch */ - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) - goto unlock; + return err; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct intel_wedge_me w; long timeout; @@ -355,8 +364,6 @@ static int igt_hang_sanitycheck(void *arg) fini: hang_fini(&h); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -383,9 +390,7 @@ static int igt_reset_nop(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; @@ -395,9 +400,7 @@ static int igt_reset_nop(void *arg) reset_count = i915_reset_count(global); count = 0; do { - mutex_lock(>->i915->drm.struct_mutex); - - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { int i; for (i = 0; i < 16; i++) { @@ -417,7 +420,6 @@ static int igt_reset_nop(void *arg) intel_gt_reset(gt, ALL_ENGINES, NULL); igt_global_reset_unlock(gt); - mutex_unlock(>->i915->drm.struct_mutex); if (intel_gt_is_wedged(gt)) { err = -EIO; break; @@ -429,16 +431,13 @@ static int igt_reset_nop(void *arg) break; } - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - + err = igt_flush_test(gt->i915); out: mock_file_free(gt->i915, file); if (intel_gt_is_wedged(gt)) @@ -458,23 +457,21 @@ static int igt_reset_nop_engine(void *arg) /* Check that we can engine-reset during non-user portions */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; file = mock_file(gt->i915); if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; } i915_gem_context_clear_bannable(ctx); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; unsigned int count; IGT_TIMEOUT(end_time); @@ -494,7 +491,6 @@ static int igt_reset_nop_engine(void *arg) break; } - mutex_lock(>->i915->drm.struct_mutex); for (i = 0; i < 16; i++) { struct i915_request *rq; @@ -507,7 +503,6 @@ static int igt_reset_nop_engine(void *arg) i915_request_add(rq); } err = intel_engine_reset(engine, NULL); - mutex_unlock(>->i915->drm.struct_mutex); if (err) { pr_err("i915_reset_engine failed\n"); break; @@ -533,15 +528,12 @@ static int igt_reset_nop_engine(void *arg) if (err) break; - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - + err = igt_flush_test(gt->i915); out: mock_file_free(gt->i915, file); if (intel_gt_is_wedged(gt)) @@ -559,18 +551,16 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) /* Check that we can issue an engine reset on an idle engine (no-op) */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (active) { - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); - mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { unsigned int reset_count, reset_engine_count; IGT_TIMEOUT(end_time); @@ -593,17 +583,14 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (active) { struct i915_request *rq; - mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -647,7 +634,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (err) break; - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } @@ -655,11 +642,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (intel_gt_is_wedged(gt)) err = -EIO; - if (active) { - mutex_lock(>->i915->drm.struct_mutex); + if (active) hang_fini(&h); - mutex_unlock(>->i915->drm.struct_mutex); - } return err; } @@ -725,9 +709,7 @@ static int active_engine(void *data) return PTR_ERR(file); for (count = 0; count < ARRAY_SIZE(ctx); count++) { - mutex_lock(&engine->i915->drm.struct_mutex); ctx[count] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); if (IS_ERR(ctx[count])) { err = PTR_ERR(ctx[count]); while (--count) @@ -741,10 +723,8 @@ static int active_engine(void *data) struct i915_request *old = rq[idx]; struct i915_request *new; - mutex_lock(&engine->i915->drm.struct_mutex); new = igt_request_alloc(ctx[idx], engine); if (IS_ERR(new)) { - mutex_unlock(&engine->i915->drm.struct_mutex); err = PTR_ERR(new); break; } @@ -755,7 +735,6 @@ static int active_engine(void *data) rq[idx] = i915_request_get(new); i915_request_add(new); - mutex_unlock(&engine->i915->drm.struct_mutex); err = active_request_put(old); if (err) @@ -791,13 +770,11 @@ static int __igt_reset_engines(struct intel_gt *gt, * with any other engine. */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); - mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; @@ -805,7 +782,7 @@ static int __igt_reset_engines(struct intel_gt *gt, h.ctx->sched.priority = 1024; } - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long device = i915_reset_count(global); unsigned long count = 0, reported; @@ -823,7 +800,7 @@ static int __igt_reset_engines(struct intel_gt *gt, } memset(threads, 0, sizeof(threads)); - for_each_engine(other, gt->i915, tmp) { + for_each_engine(other, gt, tmp) { struct task_struct *tsk; threads[tmp].resets = @@ -855,17 +832,14 @@ static int __igt_reset_engines(struct intel_gt *gt, struct i915_request *rq = NULL; if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -940,7 +914,7 @@ static int __igt_reset_engines(struct intel_gt *gt, } unwind: - for_each_engine(other, gt->i915, tmp) { + for_each_engine(other, gt, tmp) { int ret; if (!threads[tmp].task) @@ -977,9 +951,7 @@ unwind: if (err) break; - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); + err = igt_flush_test(gt->i915); if (err) break; } @@ -987,11 +959,8 @@ unwind: if (intel_gt_is_wedged(gt)) err = -EIO; - if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); + if (flags & TEST_ACTIVE) hang_fini(&h); - mutex_unlock(>->i915->drm.struct_mutex); - } return err; } @@ -1061,7 +1030,6 @@ static int igt_reset_wait(void *arg) igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) goto unlock; @@ -1109,7 +1077,6 @@ out_rq: fini: hang_fini(&h); unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -1127,15 +1094,14 @@ static int evict_vma(void *data) { struct evict_vma *arg = data; struct i915_address_space *vm = arg->vma->vm; - struct drm_i915_private *i915 = vm->i915; struct drm_mm_node evict = arg->vma->node; int err; complete(&arg->completion); - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&vm->mutex); err = i915_gem_evict_for_node(vm, &evict, 0); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&vm->mutex); return err; } @@ -1143,39 +1109,33 @@ static int evict_vma(void *data) static int evict_fence(void *data) { struct evict_vma *arg = data; - struct drm_i915_private *i915 = arg->vma->vm->i915; int err; complete(&arg->completion); - mutex_lock(&i915->drm.struct_mutex); - /* Mark the fence register as dirty to force the mmio update. */ err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512); if (err) { pr_err("Invalid Y-tiling settings; err:%d\n", err); - goto out_unlock; + return err; } err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE); if (err) { pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err); - goto out_unlock; + return err; } err = i915_vma_pin_fence(arg->vma); i915_vma_unpin(arg->vma); if (err) { pr_err("Unable to pin Y-tiled fence; err:%d\n", err); - goto out_unlock; + return err; } i915_vma_unpin_fence(arg->vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - return err; + return 0; } static int __igt_reset_evict_vma(struct intel_gt *gt, @@ -1196,10 +1156,9 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, /* Check that we can recover an unbind stuck on a hanging request */ - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) - goto unlock; + return err; obj = i915_gem_object_create_internal(gt->i915, SZ_1M); if (IS_ERR(obj)) { @@ -1262,8 +1221,6 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, if (err) goto out_rq; - mutex_unlock(>->i915->drm.struct_mutex); - if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1312,16 +1269,12 @@ out_reset: put_task_struct(tsk); } - mutex_lock(>->i915->drm.struct_mutex); out_rq: i915_request_put(rq); out_obj: i915_gem_object_put(obj); fini: hang_fini(&h); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); - if (intel_gt_is_wedged(gt)) return -EIO; @@ -1340,6 +1293,7 @@ static int igt_reset_evict_ppgtt(void *arg) { struct intel_gt *gt = arg; struct i915_gem_context *ctx; + struct i915_address_space *vm; struct drm_file *file; int err; @@ -1347,18 +1301,20 @@ static int igt_reset_evict_ppgtt(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; } err = 0; - if (ctx->vm) /* aliasing == global gtt locking, covered above */ - err = __igt_reset_evict_vma(gt, ctx->vm, + vm = i915_gem_context_get_vm_rcu(ctx); + if (!i915_is_ggtt(vm)) { + /* aliasing == global gtt locking, covered above */ + err = __igt_reset_evict_vma(gt, vm, evict_vma, EXEC_OBJECT_WRITE); + } + i915_vm_put(vm); out: mock_file_free(gt->i915, file); @@ -1379,7 +1335,7 @@ static int wait_for_others(struct intel_gt *gt, struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { if (engine == exclude) continue; @@ -1403,12 +1359,11 @@ static int igt_reset_queue(void *arg) igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) goto unlock; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *prev; IGT_TIMEOUT(end_time); unsigned int count; @@ -1518,7 +1473,7 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } @@ -1526,7 +1481,6 @@ static int igt_reset_queue(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -1547,17 +1501,15 @@ static int igt_handle_error(void *arg) /* Check that we can issue a global GPU and engine reset */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (!engine || !intel_engine_can_store_dword(engine)) return 0; - mutex_lock(>->i915->drm.struct_mutex); - err = hang_init(&h, gt); if (err) - goto err_unlock; + return err; rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { @@ -1581,8 +1533,6 @@ static int igt_handle_error(void *arg) goto err_request; } - mutex_unlock(>->i915->drm.struct_mutex); - /* Temporarily disable error capture */ error = xchg(&global->first_error, (void *)-1); @@ -1590,8 +1540,6 @@ static int igt_handle_error(void *arg) xchg(&global->first_error, error); - mutex_lock(>->i915->drm.struct_mutex); - if (rq->fence.error != -EIO) { pr_err("Guilty request not identified!\n"); err = -EINVAL; @@ -1602,8 +1550,6 @@ err_request: i915_request_put(rq); err_fini: hang_fini(&h); -err_unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -1689,14 +1635,13 @@ static int igt_reset_engines_atomic(void *arg) /* Check that the engines resets are usable from atomic context */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (USES_GUC_SUBMISSION(gt->i915)) return 0; igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); /* Flush any requests before we get started and check basics */ if (!igt_force_reset(gt)) @@ -1706,7 +1651,7 @@ static int igt_reset_engines_atomic(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { err = igt_atomic_reset_engine(engine, p); if (err) goto out; @@ -1716,9 +1661,7 @@ static int igt_reset_engines_atomic(void *arg) out: /* As we poke around the guts, do a full reset before continuing. */ igt_force_reset(gt); - unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); return err; @@ -1746,24 +1689,20 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) bool saved_hangcheck; int err; - if (!intel_has_gpu_reset(gt->i915)) + if (!intel_has_gpu_reset(gt)) return 0; if (intel_gt_is_wedged(gt)) return -EIO; /* we're long past hope of a successful reset */ - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); drain_delayed_work(>->hangcheck.work); /* flush param */ err = intel_gt_live_subtests(tests, gt); - mutex_lock(>->i915->drm.struct_mutex); - igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c new file mode 100644 index 000000000000..a7057785e420 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_llc.c @@ -0,0 +1,77 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "intel_pm.h" /* intel_gpu_freq() */ +#include "selftest_llc.h" + +static int gen6_verify_ring_freq(struct intel_llc *llc) +{ + struct drm_i915_private *i915 = llc_to_gt(llc)->i915; + struct ia_constants consts; + intel_wakeref_t wakeref; + unsigned int gpu_freq; + int err = 0; + + wakeref = intel_runtime_pm_get(llc_to_gt(llc)->uncore->rpm); + + if (!get_ia_constants(llc, &consts)) { + err = -ENODEV; + goto out_rpm; + } + + for (gpu_freq = consts.min_gpu_freq; + gpu_freq <= consts.max_gpu_freq; + gpu_freq++) { + unsigned int ia_freq, ring_freq, found; + u32 val; + + calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq); + + val = gpu_freq; + if (sandybridge_pcode_read(i915, + GEN6_PCODE_READ_MIN_FREQ_TABLE, + &val, NULL)) { + pr_err("Failed to read freq table[%d], range [%d, %d]\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq); + err = -ENXIO; + break; + } + + found = (val >> 0) & 0xff; + if (found != ia_freq) { + pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, + intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + found, ia_freq); + err = -EINVAL; + break; + } + + found = (val >> 8) & 0xff; + if (found != ring_freq) { + pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n", + gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq, + intel_gpu_freq(i915, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)), + found, ring_freq); + err = -EINVAL; + break; + } + } + +out_rpm: + intel_runtime_pm_put(llc_to_gt(llc)->uncore->rpm, wakeref); + return err; +} + +int st_llc_verify(struct intel_llc *llc) +{ + int err = 0; + + if (HAS_LLC(llc_to_gt(llc)->i915)) + err = gen6_verify_ring_freq(llc); + + return err; +} diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.h b/drivers/gpu/drm/i915/gt/selftest_llc.h new file mode 100644 index 000000000000..873f896e72f2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_llc.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef SELFTEST_LLC_H +#define SELFTEST_LLC_H + +struct intel_llc; + +int st_llc_verify(struct intel_llc *llc); + +#endif /* SELFTEST_LLC_H */ diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index d791158988d6..5dc679781a08 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -19,26 +19,52 @@ #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" +#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) +#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */ + +static struct i915_vma *create_scratch(struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + return vma; +} + static int live_sanitycheck(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_engines_iter it; struct i915_gem_context *ctx; struct intel_context *ce; struct igt_spinner spin; - intel_wakeref_t wakeref; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin, &i915->gt)) - goto err_unlock; + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (!ctx) goto err_spin; @@ -55,13 +81,13 @@ static int live_sanitycheck(void *arg) if (!igt_wait_for_spinner(&spin, rq)) { GEM_TRACE("spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx; } igt_spinner_end(&spin); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(gt->i915)) { err = -EIO; goto err_ctx; } @@ -73,12 +99,180 @@ err_ctx: kernel_context_close(ctx); err_spin: igt_spinner_fini(&spin); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } +static int live_unlite_restore(struct intel_gt *gt, int prio) +{ + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + enum intel_engine_id id; + struct igt_spinner spin; + int err = -ENOMEM; + + /* + * Check that we can correctly context switch between 2 instances + * on the same engine from the same parent context. + */ + + if (igt_spinner_init(&spin, gt)) + return err; + + ctx = kernel_context(gt->i915); + if (!ctx) + goto err_spin; + + err = 0; + for_each_engine(engine, gt, id) { + struct intel_context *ce[2] = {}; + struct i915_request *rq[2]; + struct igt_live_test t; + int n; + + if (prio && !intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + + for (n = 0; n < ARRAY_SIZE(ce); n++) { + struct intel_context *tmp; + + tmp = intel_context_create(ctx, engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err_ce; + } + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err_ce; + } + + /* + * Setup the pair of contexts such that if we + * lite-restore using the RING_TAIL from ce[1] it + * will execute garbage from ce[0]->ring. + */ + memset(tmp->ring->vaddr, + POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ + tmp->ring->vma->size); + + ce[n] = tmp; + } + GEM_BUG_ON(!ce[1]->ring->size); + intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); + + local_irq_disable(); /* appease lockdep */ + __context_pin_acquire(ce[1]); + __execlists_update_reg_state(ce[1], engine); + __context_pin_release(ce[1]); + local_irq_enable(); + + rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + goto err_ce; + } + + i915_request_get(rq[0]); + i915_request_add(rq[0]); + GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); + + if (!igt_wait_for_spinner(&spin, rq[0])) { + i915_request_put(rq[0]); + goto err_ce; + } + + rq[1] = i915_request_create(ce[1]); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + i915_request_put(rq[0]); + goto err_ce; + } + + if (!prio) { + /* + * Ensure we do the switch to ce[1] on completion. + * + * rq[0] is already submitted, so this should reduce + * to a no-op (a wait on a request on the same engine + * uses the submit fence, not the completion fence), + * but it will install a dependency on rq[1] for rq[0] + * that will prevent the pair being reordered by + * timeslicing. + */ + i915_request_await_dma_fence(rq[1], &rq[0]->fence); + } + + i915_request_get(rq[1]); + i915_request_add(rq[1]); + GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); + i915_request_put(rq[0]); + + if (prio) { + struct i915_sched_attr attr = { + .priority = prio, + }; + + /* Alternatively preempt the spinner with ce[1] */ + engine->schedule(rq[1], &attr); + } + + /* And switch back to ce[0] for good measure */ + rq[0] = i915_request_create(ce[0]); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + i915_request_put(rq[1]); + goto err_ce; + } + + i915_request_await_dma_fence(rq[0], &rq[1]->fence); + i915_request_get(rq[0]); + i915_request_add(rq[0]); + GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); + i915_request_put(rq[1]); + i915_request_put(rq[0]); + +err_ce: + tasklet_kill(&engine->execlists.tasklet); /* flush submission */ + igt_spinner_end(&spin); + for (n = 0; n < ARRAY_SIZE(ce); n++) { + if (IS_ERR_OR_NULL(ce[n])) + break; + + intel_context_unpin(ce[n]); + intel_context_put(ce[n]); + } + + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + } + + kernel_context_close(ctx); +err_spin: + igt_spinner_fini(&spin); + return err; +} + +static int live_unlite_switch(void *arg) +{ + return live_unlite_restore(arg, 0); +} + +static int live_unlite_preempt(void *arg) +{ + return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); +} + static int emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) { @@ -131,7 +325,13 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) if (IS_ERR(rq)) goto out_ctx; - err = emit_semaphore_chain(rq, vma, idx); + err = 0; + if (rq->engine->emit_init_breadcrumb) + err = rq->engine->emit_init_breadcrumb(rq); + if (err == 0) + err = emit_semaphore_chain(rq, vma, idx); + if (err == 0) + i915_request_get(rq); i915_request_add(rq); if (err) rq = ERR_PTR(err); @@ -144,10 +344,10 @@ out_ctx: static int release_queue(struct intel_engine_cs *engine, struct i915_vma *vma, - int idx) + int idx, int prio) { struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + .priority = prio, }; struct i915_request *rq; u32 *cs; @@ -168,9 +368,15 @@ release_queue(struct intel_engine_cs *engine, *cs++ = 1; intel_ring_advance(rq, cs); + + i915_request_get(rq); i915_request_add(rq); + local_bh_disable(); engine->schedule(rq, &attr); + local_bh_enable(); /* kick tasklet */ + + i915_request_put(rq); return 0; } @@ -189,8 +395,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, if (IS_ERR(head)) return PTR_ERR(head); - i915_request_get(head); - for_each_engine(engine, outer->i915, id) { + for_each_engine(engine, outer->gt, id) { for (i = 0; i < count; i++) { struct i915_request *rq; @@ -199,15 +404,16 @@ slice_semaphore_queue(struct intel_engine_cs *outer, err = PTR_ERR(rq); goto out; } + + i915_request_put(rq); } } - err = release_queue(outer, vma, n); + err = release_queue(outer, vma, n, INT_MAX); if (err) goto out; - if (i915_request_wait(head, - I915_WAIT_LOCKED, + if (i915_request_wait(head, 0, 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", count, n); @@ -223,9 +429,8 @@ out: static int live_timeslice_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; struct i915_vma *vma; void *vaddr; int err = 0; @@ -240,16 +445,11 @@ static int live_timeslice_preempt(void *arg) * ready task. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_unlock; - } - - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -269,7 +469,7 @@ static int live_timeslice_preempt(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { if (!intel_engine_has_preemption(engine)) continue; @@ -279,7 +479,7 @@ static int live_timeslice_preempt(void *arg) if (err) goto err_pin; - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(gt->i915)) { err = -EIO; goto err_pin; } @@ -292,22 +492,161 @@ err_map: i915_gem_object_unpin_map(obj); err_obj: i915_gem_object_put(obj); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static struct i915_request *nop_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static void wait_for_submit(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + do { + cond_resched(); + intel_engine_flush_submission(engine); + } while (!i915_request_is_active(rq)); +} + +static int live_timeslice_queue(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct i915_vma *vma; + void *vaddr; + int err = 0; + + /* + * Make sure that even if ELSP[0] and ELSP[1] are filled with + * timeslicing between them disabled, we *do* enable timeslicing + * if the queue demands it. (Normally, we do not submit if + * ELSP[1] is already occupied, so must rely on timeslicing to + * eject ELSP[0] in favour of the queue.) + */ + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_map; + + for_each_engine(engine, gt, id) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + }; + struct i915_request *rq, *nop; + + if (!intel_engine_has_preemption(engine)) + continue; + + memset(vaddr, 0, PAGE_SIZE); + + /* ELSP[0]: semaphore wait */ + rq = semaphore_queue(engine, vma, 0); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_pin; + } + engine->schedule(rq, &attr); + wait_for_submit(engine, rq); + + /* ELSP[1]: nop request */ + nop = nop_request(engine); + if (IS_ERR(nop)) { + err = PTR_ERR(nop); + i915_request_put(rq); + goto err_pin; + } + wait_for_submit(engine, nop); + i915_request_put(nop); + + GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + /* Queue: semaphore signal, matching priority as semaphore */ + err = release_queue(engine, vma, 1, effective_prio(rq)); + if (err) { + i915_request_put(rq); + goto err_pin; + } + + intel_engine_flush_submission(engine); + if (!READ_ONCE(engine->execlists.timer.expires) && + !i915_request_completed(rq)) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + GEM_TRACE_DUMP(); + + memset(vaddr, 0xff, PAGE_SIZE); + err = -EINVAL; + } + + /* Timeslice every jiffie, so within 2 we should signal */ + if (i915_request_wait(rq, 0, 3) < 0) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + pr_err("%s: Failed to timeslice into queue\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + memset(vaddr, 0xff, PAGE_SIZE); + err = -EIO; + } + i915_request_put(rq); + if (err) + break; + } + +err_pin: + i915_vma_unpin(vma); +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); return err; } static int live_busywait_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct intel_engine_cs *engine; struct drm_i915_gem_object *obj; struct i915_vma *vma; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; u32 *map; @@ -316,22 +655,19 @@ static int live_busywait_preempt(void *arg) * preempt the busywaits used to synchronise between rings. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) - goto err_unlock; + return -ENOMEM; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); goto err_ctx_lo; @@ -343,7 +679,7 @@ static int live_busywait_preempt(void *arg) goto err_obj; } - vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_map; @@ -353,7 +689,7 @@ static int live_busywait_preempt(void *arg) if (err) goto err_map; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *lo, *hi; struct igt_live_test t; u32 *cs; @@ -364,7 +700,7 @@ static int live_busywait_preempt(void *arg) if (!intel_engine_can_store_dword(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_vma; } @@ -444,7 +780,7 @@ static int live_busywait_preempt(void *arg) i915_request_add(hi); if (i915_request_wait(lo, 0, HZ / 5) < 0) { - struct drm_printer p = drm_info_printer(i915->drm.dev); + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); pr_err("%s: Failed to preempt semaphore busywait!\n", engine->name); @@ -452,7 +788,7 @@ static int live_busywait_preempt(void *arg) intel_engine_dump(engine, &p, "%s\n", engine->name); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_vma; } @@ -475,9 +811,6 @@ err_ctx_lo: kernel_context_close(ctx_lo); err_ctx_hi: kernel_context_close(ctx_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -501,49 +834,45 @@ spinner_create_request(struct igt_spinner *spin, static int live_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) + if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) pr_err("Logical preemption supported, but not exposed\n"); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + if (igt_spinner_init(&spin_hi, gt)) + return -ENOMEM; - if (igt_spinner_init(&spin_lo, &i915->gt)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct igt_live_test t; struct i915_request *rq; if (!intel_engine_has_preemption(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_ctx_lo; } @@ -559,7 +888,7 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -576,7 +905,7 @@ static int live_preempt(void *arg) if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -599,54 +928,47 @@ err_spin_lo: igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } static int live_late_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; struct i915_sched_attr attr = {}; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + if (igt_spinner_init(&spin_hi, gt)) + return -ENOMEM; - if (igt_spinner_init(&spin_lo, &i915->gt)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ ctx_lo->sched.priority = I915_USER_PRIORITY(1); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct igt_live_test t; struct i915_request *rq; if (!intel_engine_has_preemption(engine)) continue; - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_ctx_lo; } @@ -705,15 +1027,12 @@ err_spin_lo: igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -723,14 +1042,13 @@ struct preempt_client { struct i915_gem_context *ctx; }; -static int preempt_client_init(struct drm_i915_private *i915, - struct preempt_client *c) +static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) { - c->ctx = kernel_context(i915); + c->ctx = kernel_context(gt->i915); if (!c->ctx) return -ENOMEM; - if (igt_spinner_init(&c->spin, &i915->gt)) + if (igt_spinner_init(&c->spin, gt)) goto err_ctx; return 0; @@ -748,11 +1066,10 @@ static void preempt_client_fini(struct preempt_client *c) static int live_nopreempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct preempt_client a, b; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -760,19 +1077,16 @@ static int live_nopreempt(void *arg) * that may be being observed and not want to be interrupted. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (preempt_client_init(i915, &a)) - goto err_unlock; - if (preempt_client_init(i915, &b)) + if (preempt_client_init(gt, &a)) + return -ENOMEM; + if (preempt_client_init(gt, &b)) goto err_client_a; b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *rq_a, *rq_b; if (!intel_engine_has_preemption(engine)) @@ -832,7 +1146,7 @@ static int live_nopreempt(void *arg) goto err_wedged; } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) goto err_wedged; } @@ -841,29 +1155,25 @@ err_client_b: preempt_client_fini(&b); err_client_a: preempt_client_fini(&a); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: igt_spinner_end(&b.spin); igt_spinner_end(&a.spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_b; } static int live_suppress_self_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct i915_sched_attr attr = { .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) }; struct preempt_client a, b; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -873,30 +1183,31 @@ static int live_suppress_self_preempt(void *arg) * completion event. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; /* presume black blox */ - if (intel_vgpu_active(i915)) + if (intel_vgpu_active(gt->i915)) return 0; /* GVT forces single port & request submission */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (preempt_client_init(i915, &a)) - goto err_unlock; - if (preempt_client_init(i915, &b)) + if (preempt_client_init(gt, &a)) + return -ENOMEM; + if (preempt_client_init(gt, &b)) goto err_client_a; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *rq_a, *rq_b; int depth; if (!intel_engine_has_preemption(engine)) continue; + if (igt_flush_test(gt->i915)) + goto err_wedged; + + intel_engine_pm_get(engine); engine->execlists.preempt_hang.count = 0; rq_a = spinner_create_request(&a.spin, @@ -904,12 +1215,14 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_a)) { err = PTR_ERR(rq_a); + intel_engine_pm_put(engine); goto err_client_b; } i915_request_add(rq_a); if (!igt_wait_for_spinner(&a.spin, rq_a)) { pr_err("First client failed to start\n"); + intel_engine_pm_put(engine); goto err_wedged; } @@ -921,6 +1234,7 @@ static int live_suppress_self_preempt(void *arg) MI_NOOP); if (IS_ERR(rq_b)) { err = PTR_ERR(rq_b); + intel_engine_pm_put(engine); goto err_client_b; } i915_request_add(rq_b); @@ -931,6 +1245,7 @@ static int live_suppress_self_preempt(void *arg) if (!igt_wait_for_spinner(&b.spin, rq_b)) { pr_err("Second client failed to start\n"); + intel_engine_pm_put(engine); goto err_wedged; } @@ -944,11 +1259,13 @@ static int live_suppress_self_preempt(void *arg) engine->name, engine->execlists.preempt_hang.count, depth); + intel_engine_pm_put(engine); err = -EINVAL; goto err_client_b; } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + intel_engine_pm_put(engine); + if (igt_flush_test(gt->i915)) goto err_wedged; } @@ -957,15 +1274,12 @@ err_client_b: preempt_client_fini(&b); err_client_a: preempt_client_fini(&a); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: igt_spinner_end(&b.spin); igt_spinner_end(&a.spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_b; } @@ -984,9 +1298,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine) if (!rq) return NULL; - INIT_LIST_HEAD(&rq->active_list); rq->engine = engine; + spin_lock_init(&rq->lock); + INIT_LIST_HEAD(&rq->fence.cb_list); + rq->fence.lock = &rq->lock; + rq->fence.ops = &i915_fence_ops; + i915_sched_node_init(&rq->sched); /* mark this request as permanently incomplete */ @@ -1021,11 +1339,10 @@ static void dummy_request_free(struct i915_request *dummy) static int live_suppress_wait_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct preempt_client client[4]; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; int i; @@ -1035,22 +1352,19 @@ static int live_suppress_wait_preempt(void *arg) * not needlessly generate preempt-to-idle cycles. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (preempt_client_init(i915, &client[0])) /* ELSP[0] */ - goto err_unlock; - if (preempt_client_init(i915, &client[1])) /* ELSP[1] */ + if (preempt_client_init(gt, &client[0])) /* ELSP[0] */ + return -ENOMEM; + if (preempt_client_init(gt, &client[1])) /* ELSP[1] */ goto err_client_0; - if (preempt_client_init(i915, &client[2])) /* head of queue */ + if (preempt_client_init(gt, &client[2])) /* head of queue */ goto err_client_1; - if (preempt_client_init(i915, &client[3])) /* bystander */ + if (preempt_client_init(gt, &client[3])) /* bystander */ goto err_client_2; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { int depth; if (!intel_engine_has_preemption(engine)) @@ -1079,8 +1393,8 @@ static int live_suppress_wait_preempt(void *arg) } /* Disable NEWCLIENT promotion */ - __i915_active_request_set(&rq[i]->timeline->last_request, - dummy); + __i915_active_fence_set(&i915_request_timeline(rq[i])->last_request, + &dummy->fence); i915_request_add(rq[i]); } @@ -1105,7 +1419,7 @@ static int live_suppress_wait_preempt(void *arg) for (i = 0; i < ARRAY_SIZE(client); i++) igt_spinner_end(&client[i].spin); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) goto err_wedged; if (engine->execlists.preempt_hang.count) { @@ -1128,26 +1442,22 @@ err_client_1: preempt_client_fini(&client[1]); err_client_0: preempt_client_fini(&client[0]); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: for (i = 0; i < ARRAY_SIZE(client); i++) igt_spinner_end(&client[i].spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_3; } static int live_chain_preempt(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct preempt_client hi, lo; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -1156,19 +1466,16 @@ static int live_chain_preempt(void *arg) * the previously submitted spinner in B. */ - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (preempt_client_init(i915, &hi)) - goto err_unlock; + if (preempt_client_init(gt, &hi)) + return -ENOMEM; - if (preempt_client_init(i915, &lo)) + if (preempt_client_init(gt, &lo)) goto err_client_hi; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_sched_attr attr = { .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), }; @@ -1199,7 +1506,7 @@ static int live_chain_preempt(void *arg) goto err_wedged; } - if (igt_live_test_begin(&t, i915, __func__, engine->name)) { + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { err = -EIO; goto err_wedged; } @@ -1237,7 +1544,7 @@ static int live_chain_preempt(void *arg) igt_spinner_end(&hi.spin); if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("Failed to preempt over chain of %d\n", count); @@ -1253,7 +1560,7 @@ static int live_chain_preempt(void *arg) i915_request_add(rq); if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = - drm_info_printer(i915->drm.dev); + drm_info_printer(gt->i915->drm.dev); pr_err("Failed to flush low priority chain of %d requests\n", count); @@ -1274,57 +1581,50 @@ err_client_lo: preempt_client_fini(&lo); err_client_hi: preempt_client_fini(&hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: igt_spinner_end(&hi.spin); igt_spinner_end(&lo.spin); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_client_lo; } static int live_preempt_hang(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; - if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) return 0; - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + if (igt_spinner_init(&spin_hi, gt)) + return -ENOMEM; - if (igt_spinner_init(&spin_lo, &i915->gt)) + if (igt_spinner_init(&spin_lo, gt)) goto err_spin_hi; - ctx_hi = kernel_context(i915); + ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - ctx_lo = kernel_context(i915); + ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; ctx_lo->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_request *rq; if (!intel_engine_has_preemption(engine)) @@ -1341,7 +1641,7 @@ static int live_preempt_hang(void *arg) if (!igt_wait_for_spinner(&spin_lo, rq)) { GEM_TRACE("lo spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } @@ -1363,28 +1663,28 @@ static int live_preempt_hang(void *arg) HZ / 10)) { pr_err("Preemption did not occur within timeout!"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } - set_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); intel_engine_reset(engine, NULL); - clear_bit(I915_RESET_ENGINE + id, &i915->gt.reset.flags); + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); engine->execlists.preempt_hang.inject_hang = false; if (!igt_wait_for_spinner(&spin_hi, rq)) { GEM_TRACE("hi spinner failed to start\n"); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto err_ctx_lo; } igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(gt->i915)) { err = -EIO; goto err_ctx_lo; } @@ -1399,9 +1699,6 @@ err_spin_lo: igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1416,7 +1713,7 @@ static int random_priority(struct rnd_state *rnd) } struct preempt_smoke { - struct drm_i915_private *i915; + struct intel_gt *gt; struct i915_gem_context **contexts; struct intel_engine_cs *engine; struct drm_i915_gem_object *batch; @@ -1440,7 +1737,11 @@ static int smoke_submit(struct preempt_smoke *smoke, int err = 0; if (batch) { - vma = i915_vma_instance(batch, ctx->vm, NULL); + struct i915_address_space *vm; + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(batch, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1489,11 +1790,9 @@ static int smoke_crescendo_thread(void *arg) struct i915_gem_context *ctx = smoke_context(smoke); int err; - mutex_lock(&smoke->i915->drm.struct_mutex); err = smoke_submit(smoke, ctx, count % I915_PRIORITY_MAX, smoke->batch); - mutex_unlock(&smoke->i915->drm.struct_mutex); if (err) return err; @@ -1514,9 +1813,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) unsigned long count; int err = 0; - mutex_unlock(&smoke->i915->drm.struct_mutex); - - for_each_engine(engine, smoke->i915, id) { + for_each_engine(engine, smoke->gt, id) { arg[id] = *smoke; arg[id].engine = engine; if (!(flags & BATCH)) @@ -1533,7 +1830,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) } count = 0; - for_each_engine(engine, smoke->i915, id) { + for_each_engine(engine, smoke->gt, id) { int status; if (IS_ERR_OR_NULL(tsk[id])) @@ -1548,11 +1845,9 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) put_task_struct(tsk[id]); } - mutex_lock(&smoke->i915->drm.struct_mutex); - pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); + RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); return 0; } @@ -1564,7 +1859,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) count = 0; do { - for_each_engine(smoke->engine, smoke->i915, id) { + for_each_engine(smoke->engine, smoke->gt, id) { struct i915_gem_context *ctx = smoke_context(smoke); int err; @@ -1580,25 +1875,24 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", count, flags, - RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); + RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext); return 0; } static int live_preempt_smoke(void *arg) { struct preempt_smoke smoke = { - .i915 = arg, + .gt = arg, .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), .ncontext = 1024, }; const unsigned int phase[] = { 0, BATCH }; - intel_wakeref_t wakeref; struct igt_live_test t; int err = -ENOMEM; u32 *cs; int n; - if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915)) + if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) return 0; smoke.contexts = kmalloc_array(smoke.ncontext, @@ -1607,13 +1901,11 @@ static int live_preempt_smoke(void *arg) if (!smoke.contexts) return -ENOMEM; - mutex_lock(&smoke.i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm); - - smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE); + smoke.batch = + i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); if (IS_ERR(smoke.batch)) { err = PTR_ERR(smoke.batch); - goto err_unlock; + goto err_free; } cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); @@ -1627,13 +1919,13 @@ static int live_preempt_smoke(void *arg) i915_gem_object_flush_map(smoke.batch); i915_gem_object_unpin_map(smoke.batch); - if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) { + if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { err = -EIO; goto err_batch; } for (n = 0; n < smoke.ncontext; n++) { - smoke.contexts[n] = kernel_context(smoke.i915); + smoke.contexts[n] = kernel_context(smoke.gt->i915); if (!smoke.contexts[n]) goto err_ctx; } @@ -1660,15 +1952,13 @@ err_ctx: err_batch: i915_gem_object_put(smoke.batch); -err_unlock: - intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref); - mutex_unlock(&smoke.i915->drm.struct_mutex); +err_free: kfree(smoke.contexts); return err; } -static int nop_virtual_engine(struct drm_i915_private *i915, +static int nop_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling, unsigned int nctx, @@ -1687,7 +1977,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx)); for (n = 0; n < nctx; n++) { - ctx[n] = kernel_context(i915); + ctx[n] = kernel_context(gt->i915); if (!ctx[n]) { err = -ENOMEM; nctx = n; @@ -1712,7 +2002,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, } } - err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name); + err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); if (err) goto out; @@ -1759,7 +2049,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, request[nc]->fence.context, request[nc]->fence.seqno); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); break; } } @@ -1781,7 +2071,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, prime, div64_u64(ktime_to_ns(times[1]), prime)); out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (nc = 0; nc < nctx; nc++) { @@ -1794,25 +2084,22 @@ out: static int live_virtual_engine(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; struct intel_engine_cs *engine; - struct intel_gt *gt = &i915->gt; enum intel_engine_id id; unsigned int class, inst; - int err = -ENODEV; + int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - - for_each_engine(engine, i915, id) { - err = nop_virtual_engine(i915, &engine, 1, 1, 0); + for_each_engine(engine, gt, id) { + err = nop_virtual_engine(gt, &engine, 1, 1, 0); if (err) { pr_err("Failed to wrap engine %s: err=%d\n", engine->name, err); - goto out_unlock; + return err; } } @@ -1830,23 +2117,21 @@ static int live_virtual_engine(void *arg) continue; for (n = 1; n <= nsibling + 1; n++) { - err = nop_virtual_engine(i915, siblings, nsibling, + err = nop_virtual_engine(gt, siblings, nsibling, n, 0); if (err) - goto out_unlock; + return err; } - err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN); + err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); if (err) - goto out_unlock; + return err; } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } -static int mask_virtual_engine(struct drm_i915_private *i915, +static int mask_virtual_engine(struct intel_gt *gt, struct intel_engine_cs **siblings, unsigned int nsibling) { @@ -1862,7 +2147,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, * restrict it to our desired engine within the virtual engine. */ - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (!ctx) return -ENOMEM; @@ -1876,7 +2161,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, if (err) goto out_put; - err = igt_live_test_begin(&t, i915, __func__, ve->engine->name); + err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); if (err) goto out_unpin; @@ -1907,7 +2192,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, request[n]->fence.context, request[n]->fence.seqno); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); err = -EIO; goto out; } @@ -1922,11 +2207,8 @@ static int mask_virtual_engine(struct drm_i915_private *i915, } err = igt_live_test_end(&t); - if (err) - goto out; - out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < nsibling; n++) @@ -1943,17 +2225,14 @@ out_close: static int live_virtual_mask(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - struct intel_gt *gt = &i915->gt; unsigned int class, inst; - int err = 0; + int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { unsigned int nsibling; @@ -1967,17 +2246,166 @@ static int live_virtual_mask(void *arg) if (nsibling < 2) continue; - err = mask_virtual_engine(i915, siblings, nsibling); + err = mask_virtual_engine(gt, siblings, nsibling); if (err) - goto out_unlock; + return err; + } + + return 0; +} + +static int preserved_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + struct i915_request *last = NULL; + struct i915_gem_context *ctx; + struct intel_context *ve; + struct i915_vma *scratch; + struct igt_live_test t; + unsigned int n; + int err = 0; + u32 *cs; + + ctx = kernel_context(gt->i915); + if (!ctx) + return -ENOMEM; + + scratch = create_scratch(siblings[0]->gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; + } + + ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_scratch; + } + + err = intel_context_pin(ve); + if (err) + goto out_put; + + err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); + if (err) + goto out_unpin; + + for (n = 0; n < NUM_GPR_DW; n++) { + struct intel_engine_cs *engine = siblings[n % nsibling]; + struct i915_request *rq; + + rq = i915_request_create(ve); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_end; + } + + i915_request_put(last); + last = i915_request_get(rq); + + cs = intel_ring_begin(rq, 8); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto out_end; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); + *cs++ = n + 1; + + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + /* Restrict this request to run on a particular engine */ + rq->execution_mask = engine->mask; + i915_request_add(rq); + } + + if (i915_request_wait(last, 0, HZ / 5) < 0) { + err = -ETIME; + goto out_end; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_end; } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n] != n) { + pr_err("Incorrect value[%d] found for GPR[%d]\n", + cs[n], n); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +out_end: + if (igt_live_test_end(&t)) + err = -EIO; + i915_request_put(last); +out_unpin: + intel_context_unpin(ve); +out_put: + intel_context_put(ve); +out_scratch: + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(ctx); return err; } -static int bond_virtual_engine(struct drm_i915_private *i915, +static int live_virtual_preserved(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class, inst; + + /* + * Check that the context image retains non-privileged (user) registers + * from one engine to the next. For this we check that the CS_GPR + * are preserved. + */ + + if (USES_GUC_SUBMISSION(gt->i915)) + return 0; + + /* As we use CS_GPR we cannot run before they existed on all engines. */ + if (INTEL_GEN(gt->i915) < 9) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, err; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!gt->engine_class[class][inst]) + continue; + + siblings[nsibling++] = gt->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + err = preserved_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + +static int bond_virtual_engine(struct intel_gt *gt, unsigned int class, struct intel_engine_cs **siblings, unsigned int nsibling, @@ -1993,13 +2421,13 @@ static int bond_virtual_engine(struct drm_i915_private *i915, GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (!ctx) return -ENOMEM; err = 0; rq[0] = ERR_PTR(-ENOMEM); - for_each_engine(master, i915, id) { + for_each_engine(master, gt, id) { struct i915_sw_fence fence = {}; if (master->class == class) @@ -2104,7 +2532,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915, out: for (n = 0; !IS_ERR(rq[n]); n++) i915_request_put(rq[n]); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; kernel_context_close(ctx); @@ -2121,17 +2549,14 @@ static int live_virtual_bond(void *arg) { "schedule", BOND_SCHEDULE }, { }, }; - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - struct intel_gt *gt = &i915->gt; unsigned int class, inst; - int err = 0; + int err; - if (USES_GUC_SUBMISSION(i915)) + if (USES_GUC_SUBMISSION(gt->i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { const struct phase *p; int nsibling; @@ -2148,27 +2573,28 @@ static int live_virtual_bond(void *arg) continue; for (p = phases; p->name; p++) { - err = bond_virtual_engine(i915, + err = bond_virtual_engine(gt, class, siblings, nsibling, p->flags); if (err) { pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", __func__, p->name, class, nsibling, err); - goto out_unlock; + return err; } } } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } int intel_execlists_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_sanitycheck), + SUBTEST(live_unlite_switch), + SUBTEST(live_unlite_preempt), SUBTEST(live_timeslice_preempt), + SUBTEST(live_timeslice_queue), SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), SUBTEST(live_late_preempt), @@ -2180,6 +2606,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt_smoke), SUBTEST(live_virtual_engine), SUBTEST(live_virtual_mask), + SUBTEST(live_virtual_preserved), SUBTEST(live_virtual_bond), }; @@ -2189,5 +2616,417 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_live_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); +} + +static void hexdump(const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + pr_info("*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + pr_info("[%04zx] %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + +static int live_lrc_layout(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 *mem; + int err; + + /* + * Check the registers offsets we use to create the initial reg state + * match the layout saved by HW. + */ + + mem = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!mem) + return -ENOMEM; + + err = 0; + for_each_engine(engine, gt, id) { + u32 *hw, *lrc; + int dw; + + if (!engine->default_state) + continue; + + hw = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (IS_ERR(hw)) { + err = PTR_ERR(hw); + break; + } + hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw); + + lrc = memset(mem, 0, PAGE_SIZE); + execlists_init_reg_state(lrc, + engine->kernel_context, + engine, + engine->kernel_context->ring, + true); + + dw = 0; + do { + u32 lri = hw[dw]; + + if (lri == 0) { + dw++; + continue; + } + + if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) { + pr_err("%s: Expected LRI command at dword %d, found %08x\n", + engine->name, dw, lri); + err = -EINVAL; + break; + } + + if (lrc[dw] != lri) { + pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n", + engine->name, dw, lri, lrc[dw]); + err = -EINVAL; + break; + } + + lri &= 0x7f; + lri++; + dw++; + + while (lri) { + if (hw[dw] != lrc[dw]) { + pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", + engine->name, dw, hw[dw], lrc[dw]); + err = -EINVAL; + break; + } + + /* + * Skip over the actual register value as we + * expect that to differ. + */ + dw += 2; + lri -= 2; + } + } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + + if (err) { + pr_info("%s: HW register image:\n", engine->name); + hexdump(hw, PAGE_SIZE); + + pr_info("%s: SW register image:\n", engine->name); + hexdump(lrc, PAGE_SIZE); + } + + i915_gem_object_unpin_map(engine->default_state); + if (err) + break; + } + + kfree(mem); + return err; +} + +static int __live_lrc_state(struct i915_gem_context *fixme, + struct intel_engine_cs *engine, + struct i915_vma *scratch) +{ + struct intel_context *ce; + struct i915_request *rq; + enum { + RING_START_IDX = 0, + RING_TAIL_IDX, + MAX_IDX + }; + u32 expected[MAX_IDX]; + u32 *cs; + int err; + int n; + + ce = intel_context_create(fixme, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) + goto err_put; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + cs = intel_ring_begin(rq, 4 * MAX_IDX); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(rq); + goto err_unpin; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32); + *cs++ = 0; + + expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma); + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)); + *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32); + *cs++ = 0; + + i915_request_get(rq); + i915_request_add(rq); + + intel_engine_flush_submission(engine); + expected[RING_TAIL_IDX] = ce->ring->tail; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < MAX_IDX; n++) { + if (cs[n] != expected[n]) { + pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n", + engine->name, n, cs[n], expected[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + i915_request_put(rq); +err_unpin: + intel_context_unpin(ce); +err_put: + intel_context_put(ce); + return err; +} + +static int live_lrc_state(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *fixme; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check the live register state matches what we expect for this + * intel_context. + */ + + fixme = kernel_context(gt->i915); + if (!fixme) + return -ENOMEM; + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; + } + + for_each_engine(engine, gt, id) { + err = __live_lrc_state(fixme, engine, scratch); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(fixme); + return err; +} + +static int gpr_make_dirty(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + u32 *cs; + int n; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW); + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = CS_GPR(engine, n); + *cs++ = STACK_MAGIC; + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + i915_request_add(rq); + + return 0; +} + +static int __live_gpr_clear(struct i915_gem_context *fixme, + struct intel_engine_cs *engine, + struct i915_vma *scratch) +{ + struct intel_context *ce; + struct i915_request *rq; + u32 *cs; + int err; + int n; + + if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS) + return 0; /* GPR only on rcs0 for gen8 */ + + err = gpr_make_dirty(engine); + if (err) + return err; + + ce = intel_context_create(fixme, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_put; + } + + cs = intel_ring_begin(rq, 4 * NUM_GPR_DW); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(rq); + goto err_put; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -ETIME; + goto err_rq; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_rq; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n]) { + pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n", + engine->name, + n / 2, n & 1 ? "udw" : "ldw", + cs[n]); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +err_rq: + i915_request_put(rq); +err_put: + intel_context_put(ce); + return err; +} + +static int live_gpr_clear(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *fixme; + struct i915_vma *scratch; + enum intel_engine_id id; + int err = 0; + + /* + * Check that GPR registers are cleared in new contexts as we need + * to avoid leaking any information from previous contexts. + */ + + fixme = kernel_context(gt->i915); + if (!fixme) + return -ENOMEM; + + scratch = create_scratch(gt); + if (IS_ERR(scratch)) { + err = PTR_ERR(scratch); + goto out_close; + } + + for_each_engine(engine, gt, id) { + err = __live_gpr_clear(fixme, engine, scratch); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + i915_vma_unpin_and_release(&scratch, 0); +out_close: + kernel_context_close(fixme); + return err; +} + +int intel_lrc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_lrc_layout), + SUBTEST(live_lrc_state), + SUBTEST(live_gpr_clear), + }; + + if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 00a4f60cdfd5..6efb9221b7fa 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -17,7 +17,7 @@ static int igt_global_reset(void *arg) /* Check that we can issue a global GPU reset */ igt_global_reset_lock(gt); - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); reset_count = i915_reset_count(>->i915->gpu_error); @@ -28,7 +28,7 @@ static int igt_global_reset(void *arg) err = -EINVAL; } - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -45,14 +45,14 @@ static int igt_wedged_reset(void *arg) /* Check that we can recover a wedged device with a GPU reset */ igt_global_reset_lock(gt); - wakeref = intel_runtime_pm_get(>->i915->runtime_pm); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); intel_gt_set_wedged(gt); GEM_BUG_ON(!intel_gt_is_wedged(gt)); intel_gt_reset(gt, ALL_ENGINES, NULL); - intel_runtime_pm_put(>->i915->runtime_pm, wakeref); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); return intel_gt_is_wedged(gt) ? -EIO : 0; @@ -112,7 +112,7 @@ static int igt_atomic_engine_reset(void *arg) /* Check that the resets are usable from atomic context */ - if (!intel_has_reset_engine(gt->i915)) + if (!intel_has_reset_engine(gt)) return 0; if (USES_GUC_SUBMISSION(gt->i915)) @@ -125,7 +125,7 @@ static int igt_atomic_engine_reset(void *arg) if (!igt_force_reset(gt)) goto out_unlock; - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { tasklet_disable_nosync(&engine->execlists.tasklet); intel_engine_pm_get(engine); @@ -170,7 +170,7 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) }; struct intel_gt *gt = &i915->gt; - if (!intel_has_gpu_reset(gt->i915)) + if (!intel_has_gpu_reset(gt)) return 0; if (intel_gt_is_wedged(gt)) diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 321481403165..dac86f699a4c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -6,8 +6,9 @@ #include <linux/prime_numbers.h> -#include "gem/i915_gem_pm.h" +#include "intel_engine_pm.h" #include "intel_gt.h" +#include "intel_gt_requests.h" #include "../selftests/i915_random.h" #include "../i915_selftest.h" @@ -34,7 +35,7 @@ static unsigned long hwsp_cacheline(struct intel_timeline *tl) #define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) struct mock_hwsp_freelist { - struct drm_i915_private *i915; + struct intel_gt *gt; struct radix_tree_root cachelines; struct intel_timeline **history; unsigned long count, max; @@ -67,7 +68,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, unsigned long cacheline; int err; - tl = intel_timeline_create(&state->i915->gt, NULL); + tl = intel_timeline_create(state->gt, NULL); if (IS_ERR(tl)) return PTR_ERR(tl); @@ -105,6 +106,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, static int mock_hwsp_freelist(void *arg) { struct mock_hwsp_freelist state; + struct drm_i915_private *i915; const struct { const char *name; unsigned int flags; @@ -116,12 +118,14 @@ static int mock_hwsp_freelist(void *arg) unsigned int na; int err = 0; + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); - state.i915 = mock_gem_device(); - if (!state.i915) - return -ENOMEM; + state.gt = &i915->gt; /* * Create a bunch of timelines and check that their HWSP do not overlap. @@ -136,7 +140,6 @@ static int mock_hwsp_freelist(void *arg) goto err_put; } - mutex_lock(&state.i915->drm.struct_mutex); for (p = phases; p->name; p++) { pr_debug("%s(%s)\n", __func__, p->name); for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { @@ -149,10 +152,9 @@ static int mock_hwsp_freelist(void *arg) out: for (na = 0; na < state.max; na++) __mock_hwsp_record(&state, na, NULL); - mutex_unlock(&state.i915->drm.struct_mutex); kfree(state.history); err_put: - drm_dev_put(&state.i915->drm); + drm_dev_put(&i915->drm); return err; } @@ -449,8 +451,6 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) struct i915_request *rq; int err; - lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */ - err = intel_timeline_pin(tl); if (err) { rq = ERR_PTR(err); @@ -461,10 +461,14 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) if (IS_ERR(rq)) goto out_unpin; + i915_request_get(rq); + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); i915_request_add(rq); - if (err) + if (err) { + i915_request_put(rq); rq = ERR_PTR(err); + } out_unpin: intel_timeline_unpin(tl); @@ -475,11 +479,11 @@ out: } static struct intel_timeline * -checked_intel_timeline_create(struct drm_i915_private *i915) +checked_intel_timeline_create(struct intel_gt *gt) { struct intel_timeline *tl; - tl = intel_timeline_create(&i915->gt, NULL); + tl = intel_timeline_create(gt, NULL); if (IS_ERR(tl)) return tl; @@ -496,11 +500,10 @@ checked_intel_timeline_create(struct drm_i915_private *i915) static int live_hwsp_engine(void *arg) { #define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count, n; int err = 0; @@ -515,37 +518,40 @@ static int live_hwsp_engine(void *arg) if (!timelines) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); + for (n = 0; n < NUM_TIMELINES; n++) { struct intel_timeline *tl; struct i915_request *rq; - tl = checked_intel_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { err = PTR_ERR(tl); - goto out; + break; } rq = tl_write(tl, engine, count); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); - goto out; + break; } timelines[count++] = tl; + i915_request_put(rq); } + + intel_engine_pm_put(engine); + if (err) + break; } -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < count; n++) { @@ -559,11 +565,7 @@ out: intel_timeline_put(tl); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kvfree(timelines); - return err; #undef NUM_TIMELINES } @@ -571,11 +573,10 @@ out: static int live_hwsp_alternate(void *arg) { #define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count, n; int err = 0; @@ -591,25 +592,25 @@ static int live_hwsp_alternate(void *arg) if (!timelines) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; for (n = 0; n < NUM_TIMELINES; n++) { - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct intel_timeline *tl; struct i915_request *rq; if (!intel_engine_can_store_dword(engine)) continue; - tl = checked_intel_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { + intel_engine_pm_put(engine); err = PTR_ERR(tl); goto out; } + intel_engine_pm_get(engine); rq = tl_write(tl, engine, count); + intel_engine_pm_put(engine); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); @@ -617,11 +618,12 @@ static int live_hwsp_alternate(void *arg) } timelines[count++] = tl; + i915_request_put(rq); } } out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; for (n = 0; n < count; n++) { @@ -635,22 +637,17 @@ out: intel_timeline_put(tl); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kvfree(timelines); - return err; #undef NUM_TIMELINES } static int live_hwsp_wrap(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct intel_timeline *tl; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = 0; /* @@ -658,14 +655,10 @@ static int live_hwsp_wrap(void *arg) * foreign GPU references. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + tl = intel_timeline_create(gt, NULL); + if (IS_ERR(tl)) + return PTR_ERR(tl); - tl = intel_timeline_create(&i915->gt, NULL); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out_rpm; - } if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) goto out_free; @@ -673,7 +666,7 @@ static int live_hwsp_wrap(void *arg) if (err) goto out_free; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { const u32 *hwsp_seqno[2]; struct i915_request *rq; u32 seqno[2]; @@ -681,7 +674,9 @@ static int live_hwsp_wrap(void *arg) if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); rq = i915_request_create(engine->kernel_context); + intel_engine_pm_put(engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out; @@ -743,29 +738,24 @@ static int live_hwsp_wrap(void *arg) goto out; } - i915_retire_requests(i915); /* recycle HWSP */ + intel_gt_retire_requests(gt); /* recycle HWSP */ } out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; intel_timeline_unpin(tl); out_free: intel_timeline_put(tl); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; } static int live_hwsp_recycle(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count; int err = 0; @@ -775,38 +765,38 @@ static int live_hwsp_recycle(void *arg) * want to confuse ourselves or the GPU. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { IGT_TIMEOUT(end_time); if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); + do { struct intel_timeline *tl; struct i915_request *rq; - tl = checked_intel_timeline_create(i915); + tl = checked_intel_timeline_create(gt); if (IS_ERR(tl)) { err = PTR_ERR(tl); - goto out; + break; } rq = tl_write(tl, engine, count); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); - goto out; + break; } if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Wait for timeline writes timed out!\n"); + i915_request_put(rq); intel_timeline_put(tl); err = -EIO; - goto out; + break; } if (*tl->hwsp_seqno != count) { @@ -815,17 +805,18 @@ static int live_hwsp_recycle(void *arg) err = -EINVAL; } + i915_request_put(rq); intel_timeline_put(tl); count++; if (err) - goto out; + break; } while (!__igt_timeout(end_time, NULL)); - } -out: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + intel_engine_pm_put(engine); + if (err) + break; + } return err; } @@ -842,5 +833,5 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915) if (intel_gt_is_wedged(&i915->gt)) return 0; - return i915_live_subtests(tests, i915); + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index d06d68ac2a3b..ef02920cec29 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -33,8 +33,32 @@ struct wa_lists { } engine[I915_NUM_ENGINES]; }; +static int request_add_sync(struct i915_request *rq, int err) +{ + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -EIO; + i915_request_put(rq); + + return err; +} + +static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) +{ + int err = 0; + + i915_request_get(rq); + i915_request_add(rq); + if (spin && !igt_wait_for_spinner(spin, rq)) + err = -ETIMEDOUT; + i915_request_put(rq); + + return err; +} + static void -reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) +reference_lists_init(struct intel_gt *gt, struct wa_lists *lists) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -42,10 +66,10 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) memset(lists, 0, sizeof(*lists)); wa_init_start(&lists->gt_wa_list, "GT_REF", "global"); - gt_init_workarounds(i915, &lists->gt_wa_list); + gt_init_workarounds(gt->i915, &lists->gt_wa_list); wa_init_finish(&lists->gt_wa_list); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct i915_wa_list *wal = &lists->engine[id].wa_list; wa_init_start(wal, "REF", engine->name); @@ -59,12 +83,12 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) } static void -reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists) +reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists) { struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) + for_each_engine(engine, gt, id) intel_wa_list_free(&lists->engine[id].wa_list); intel_wa_list_free(&lists->gt_wa_list); @@ -191,10 +215,10 @@ static int check_whitelist(struct i915_gem_context *ctx, err = 0; i915_gem_object_lock(results); - intel_wedge_on_timeout(&wedge, &ctx->i915->gt, HZ / 5) /* safety net! */ + intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); i915_gem_object_unlock(results); - if (intel_gt_is_wedged(&ctx->i915->gt)) + if (intel_gt_is_wedged(engine->gt)) err = -EIO; if (err) goto out_put; @@ -243,7 +267,6 @@ switch_to_scratch_context(struct intel_engine_cs *engine, struct i915_gem_context *ctx; struct intel_context *ce; struct i915_request *rq; - intel_wakeref_t wakeref; int err = 0; ctx = kernel_context(engine->i915); @@ -255,12 +278,9 @@ switch_to_scratch_context(struct intel_engine_cs *engine, ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref) - rq = igt_spinner_create_request(spin, ce, MI_NOOP); + rq = igt_spinner_create_request(spin, ce, MI_NOOP); intel_context_put(ce); - kernel_context_close(ctx); if (IS_ERR(rq)) { spin = NULL; @@ -268,17 +288,12 @@ switch_to_scratch_context(struct intel_engine_cs *engine, goto err; } - i915_request_add(rq); - - if (spin && !igt_wait_for_spinner(spin, rq)) { - pr_err("Spinner failed to start\n"); - err = -ETIMEDOUT; - } - + err = request_add_spin(rq, spin); err: if (err && spin) igt_spinner_end(spin); + kernel_context_close(ctx); return err; } @@ -313,7 +328,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, if (err) goto out_spin; - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(engine->uncore->rpm, wakeref) err = reset(engine); igt_spinner_end(&spin); @@ -355,6 +370,7 @@ out_ctx: static struct i915_vma *create_batch(struct i915_gem_context *ctx) { struct drm_i915_gem_object *obj; + struct i915_address_space *vm; struct i915_vma *vma; int err; @@ -362,7 +378,9 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx) if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -463,12 +481,15 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, 0xffff00ff, 0xffffffff, }; + struct i915_address_space *vm; struct i915_vma *scratch; struct i915_vma *batch; int err = 0, i, v; u32 *cs, *results; - scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1); + vm = i915_gem_context_get_vm_rcu(ctx); + scratch = create_scratch(vm, 2 * ARRAY_SIZE(values) + 1); + i915_vm_put(vm); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -565,6 +586,14 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, goto err_request; } + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + if (err) + goto err_request; + err = engine->emit_bb_start(rq, batch->node.start, PAGE_SIZE, 0); @@ -572,15 +601,11 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, goto err_request; err_request: - i915_request_add(rq); - if (err) - goto out_batch; - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = request_add_sync(rq, err); + if (err) { pr_err("%s: Futzing %x timedout; cancelling test\n", engine->name, reg); - intel_gt_set_wedged(&ctx->i915->gt); - err = -EIO; + intel_gt_set_wedged(engine->gt); goto out_batch; } @@ -668,7 +693,7 @@ out_unpin: break; } - if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED)) + if (igt_flush_test(ctx->i915)) err = -EIO; out_batch: i915_vma_unpin_and_release(&batch, 0); @@ -679,36 +704,29 @@ out_scratch: static int live_dirty_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct i915_gem_context *ctx; enum intel_engine_id id; - intel_wakeref_t wakeref; struct drm_file *file; int err = 0; /* Can the user write to the whitelisted registers? */ - if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */ + if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */ return 0; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + file = mock_file(gt->i915); + if (IS_ERR(file)) + return PTR_ERR(file); - mutex_unlock(&i915->drm.struct_mutex); - file = mock_file(i915); - mutex_lock(&i915->drm.struct_mutex); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto out_rpm; - } - - ctx = live_context(i915, file); + ctx = live_context(gt->i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_file; } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { if (engine->whitelist.count == 0) continue; @@ -718,45 +736,43 @@ static int live_dirty_whitelist(void *arg) } out_file: - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); - mutex_lock(&i915->drm.struct_mutex); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mock_file_free(gt->i915, file); return err; } static int live_reset_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine = i915->engine[RCS0]; + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; int err = 0; /* If we reset the gpu, we should not lose the RING_NONPRIV */ + igt_global_reset_lock(gt); - if (!engine || engine->whitelist.count == 0) - return 0; - - igt_global_reset_lock(&i915->gt); + for_each_engine(engine, gt, id) { + if (engine->whitelist.count == 0) + continue; - if (intel_has_reset_engine(i915)) { - err = check_whitelist_across_reset(engine, - do_engine_reset, - "engine"); - if (err) - goto out; - } + if (intel_has_reset_engine(gt)) { + err = check_whitelist_across_reset(engine, + do_engine_reset, + "engine"); + if (err) + goto out; + } - if (intel_has_gpu_reset(i915)) { - err = check_whitelist_across_reset(engine, - do_device_reset, - "device"); - if (err) - goto out; + if (intel_has_gpu_reset(gt)) { + err = check_whitelist_across_reset(engine, + do_device_reset, + "device"); + if (err) + goto out; + } } out: - igt_global_reset_unlock(&i915->gt); + igt_global_reset_unlock(gt); return err; } @@ -772,6 +788,14 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, if (IS_ERR(rq)) return PTR_ERR(rq); + i915_vma_lock(results); + err = i915_request_await_object(rq, results->obj, true); + if (err == 0) + err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(results); + if (err) + goto err_req; + srm = MI_STORE_REGISTER_MEM; if (INTEL_GEN(ctx->i915) >= 8) srm++; @@ -797,12 +821,7 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, intel_ring_advance(rq, cs); err_req: - i915_request_add(rq); - - if (i915_request_wait(rq, 0, HZ / 5) < 0) - err = -EIO; - - return err; + return request_add_sync(rq, err); } static int scrub_whitelisted_registers(struct i915_gem_context *ctx, @@ -850,13 +869,19 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, goto err_request; } + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + if (err) + goto err_request; + /* Perform the writes from an unprivileged "user" batch */ err = engine->emit_bb_start(rq, batch->node.start, 0, 0); err_request: - i915_request_add(rq); - if (i915_request_wait(rq, 0, HZ / 5) < 0) - err = -EIO; + err = request_add_sync(rq, err); err_unpin: i915_gem_object_unpin_map(batch->obj); @@ -973,7 +998,7 @@ err_a: static int live_isolated_whitelist(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct { struct i915_gem_context *ctx; struct i915_vma *scratch[2]; @@ -987,40 +1012,46 @@ static int live_isolated_whitelist(void *arg) * invisible to a second context. */ - if (!intel_engines_has_context_isolation(i915)) - return 0; - - if (!i915->kernel_context->vm) + if (!intel_engines_has_context_isolation(gt->i915)) return 0; for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_address_space *vm; struct i915_gem_context *c; - c = kernel_context(i915); + c = kernel_context(gt->i915); if (IS_ERR(c)) { err = PTR_ERR(c); goto err; } - client[i].scratch[0] = create_scratch(c->vm, 1024); + vm = i915_gem_context_get_vm_rcu(c); + + client[i].scratch[0] = create_scratch(vm, 1024); if (IS_ERR(client[i].scratch[0])) { err = PTR_ERR(client[i].scratch[0]); + i915_vm_put(vm); kernel_context_close(c); goto err; } - client[i].scratch[1] = create_scratch(c->vm, 1024); + client[i].scratch[1] = create_scratch(vm, 1024); if (IS_ERR(client[i].scratch[1])) { err = PTR_ERR(client[i].scratch[1]); i915_vma_unpin_and_release(&client[i].scratch[0], 0); + i915_vm_put(vm); kernel_context_close(c); goto err; } client[i].ctx = c; + i915_vm_put(vm); } - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { + if (!engine->kernel_context->vm) + continue; + if (!whitelist_writable_count(engine)) continue; @@ -1074,7 +1105,7 @@ err: kernel_context_close(client[i].ctx); } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; return err; @@ -1109,16 +1140,16 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, static int live_gpu_reset_workarounds(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_context *ctx; intel_wakeref_t wakeref; struct wa_lists lists; bool ok; - if (!intel_has_gpu_reset(i915)) + if (!intel_has_gpu_reset(gt)) return 0; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1126,25 +1157,25 @@ live_gpu_reset_workarounds(void *arg) pr_info("Verifying after GPU reset...\n"); - igt_global_reset_lock(&i915->gt); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(i915, &lists); + reference_lists_init(gt, &lists); ok = verify_wa_lists(ctx, &lists, "before reset"); if (!ok) goto out; - intel_gt_reset(&i915->gt, ALL_ENGINES, "live_workarounds"); + intel_gt_reset(gt, ALL_ENGINES, "live_workarounds"); ok = verify_wa_lists(ctx, &lists, "after reset"); out: i915_gem_context_unlock_engines(ctx); kernel_context_close(ctx); - reference_lists_fini(i915, &lists); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(&i915->gt); + reference_lists_fini(gt, &lists); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); return ok ? 0 : -ESRCH; } @@ -1152,7 +1183,7 @@ out: static int live_engine_reset_workarounds(void *arg) { - struct drm_i915_private *i915 = arg; + struct intel_gt *gt = arg; struct i915_gem_engines_iter it; struct i915_gem_context *ctx; struct intel_context *ce; @@ -1162,17 +1193,17 @@ live_engine_reset_workarounds(void *arg) struct wa_lists lists; int ret = 0; - if (!intel_has_reset_engine(i915)) + if (!intel_has_reset_engine(gt)) return 0; - ctx = kernel_context(i915); + ctx = kernel_context(gt->i915); if (IS_ERR(ctx)) return PTR_ERR(ctx); - igt_global_reset_lock(&i915->gt); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + igt_global_reset_lock(gt); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); - reference_lists_init(i915, &lists); + reference_lists_init(gt, &lists); for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct intel_engine_cs *engine = ce->engine; @@ -1205,12 +1236,10 @@ live_engine_reset_workarounds(void *arg) goto err; } - i915_request_add(rq); - - if (!igt_wait_for_spinner(&spin, rq)) { + ret = request_add_spin(rq, &spin); + if (ret) { pr_err("Spinner failed to start\n"); igt_spinner_fini(&spin); - ret = -ETIMEDOUT; goto err; } @@ -1227,12 +1256,12 @@ live_engine_reset_workarounds(void *arg) } err: i915_gem_context_unlock_engines(ctx); - reference_lists_fini(i915, &lists); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - igt_global_reset_unlock(&i915->gt); + reference_lists_fini(gt, &lists); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + igt_global_reset_unlock(gt); kernel_context_close(ctx); - igt_flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(gt->i915); return ret; } @@ -1246,14 +1275,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) SUBTEST(live_gpu_reset_workarounds), SUBTEST(live_engine_reset_workarounds), }; - int err; if (intel_gt_is_wedged(&i915->gt)) return 0; - mutex_lock(&i915->drm.struct_mutex); - err = i915_subtests(tests, i915); - mutex_unlock(&i915->drm.struct_mutex); - - return err; + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c index 598170efcaf6..2a77c051f36a 100644 --- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c @@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context) mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 249c747e9756..37f7bcbf7dac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -9,6 +9,27 @@ #include "intel_guc_submission.h" #include "i915_drv.h" +/** + * DOC: GuC + * + * The GuC is a microcontroller inside the GT HW, introduced in gen9. The GuC is + * designed to offload some of the functionality usually performed by the host + * driver; currently the main operations it can take care of are: + * + * - Authentication of the HuC, which is required to fully enable HuC usage. + * - Low latency graphics context scheduling (a.k.a. GuC submission). + * - GT Power management. + * + * The enable_guc module parameter can be used to select which of those + * operations to enable within GuC. Note that not all the operations are + * supported on all gen9+ platforms. + * + * Enabling the GuC is not mandatory and therefore the firmware is only loaded + * if at least one of the operations is selected. However, not loading the GuC + * might result in the loss of some features that do require the GuC (currently + * just the HuC, but more are expected to land in the future). + */ + static void gen8_guc_raise_irq(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -548,9 +569,15 @@ int intel_guc_resume(struct intel_guc *guc) } /** - * DOC: GuC Address Space + * DOC: GuC Memory Management * - * The layout of GuC address space is shown below: + * GuC can't allocate any memory for its own usage, so all the allocations must + * be handled by the host driver. GuC accesses the memory via the GGTT, with the + * exception of the top and bottom parts of the 4GB address space, which are + * instead re-mapped by the GuC HW to memory location of the FW itself (WOPCM) + * or other parts of the HW. The driver must take care not to place objects that + * the GuC is going to access in these reserved ranges. The layout of the GuC + * address space is shown below: * * :: * diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 36332064de9c..2cf2d3314f62 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -607,7 +607,6 @@ out_unlock: void intel_guc_log_relay_flush(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; intel_wakeref_t wakeref; /* @@ -616,7 +615,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) */ flush_work(&log->relay.flush_work); - with_intel_runtime_pm(&i915->runtime_pm, wakeref) + with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref) guc_action_flush_log(guc); /* GuC would have updated log buffer by now, so capture it */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index edf194d23c6b..1949346e714e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -83,6 +83,9 @@ #define GEN8_GTCR _MMIO(0x4274) #define GEN8_GTCR_INVALIDATE (1<<0) +#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8) +#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0) + #define GUC_ARAT_C6DIS _MMIO(0xA178) #define GUC_SHIM_CONTROL _MMIO(0xc064) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f325d3dd564f..009e54a3764f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -29,6 +29,12 @@ enum { /** * DOC: GuC-based command submission * + * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC + * firmware is moving to an updated submission interface and we plan to + * turn submission back on when that lands. The below documentation (and related + * code) matches the old submission model and will be updated as part of the + * upgrade to the new flow. + * * GuC client: * A intel_guc_client refers to a submission path through GuC. Currently, there * is only one client, which is charged with all submissions to the GuC. This @@ -1014,7 +1020,7 @@ static void guc_interrupts_capture(struct intel_gt *gt) * to GuC */ irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); /* route USER_INTERRUPT to Host, all others are sent to GuC. */ @@ -1062,7 +1068,7 @@ static void guc_interrupts_release(struct intel_gt *gt) */ irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER); irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING); - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) ENGINE_WRITE(engine, RING_MODE_GEN7, irqs); /* route all GT interrupts to the host */ @@ -1145,7 +1151,7 @@ int intel_guc_submission_enable(struct intel_guc *guc) /* Take over from manual control of ELSP (execlists) */ guc_interrupts_capture(gt); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { engine->set_default_submission = guc_set_default_submission; engine->set_default_submission(engine); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index d4625c97b4f9..8be515c8d0f0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -9,6 +9,34 @@ #include "intel_huc.h" #include "i915_drv.h" +/** + * DOC: HuC + * + * The HuC is a dedicated microcontroller for usage in media HEVC (High + * Efficiency Video Coding) operations. Userspace can directly use the firmware + * capabilities by adding HuC specific commands to batch buffers. + * + * The kernel driver is only responsible for loading the HuC firmware and + * triggering its security authentication, which is performed by the GuC. For + * The GuC to correctly perform the authentication, the HuC binary must be + * loaded before the GuC one. Loading the HuC is optional; however, not using + * the HuC might negatively impact power usage and/or performance of media + * workloads, depending on the use-cases. + * + * See https://github.com/intel/media-driver for the latest details on HuC + * functionality. + */ + +/** + * DOC: HuC Memory Management + * + * Similarly to the GuC, the HuC can't do any memory allocations on its own, + * with the difference being that the allocations for HuC usage are handled by + * the userspace driver instead of the kernel one. The HuC accesses the memory + * via the PPGTT belonging to the context loaded on the VCS executing the + * HuC-specific commands. + */ + void intel_huc_init_early(struct intel_huc *huc) { struct drm_i915_private *i915 = huc_to_gt(huc)->i915; @@ -118,10 +146,9 @@ void intel_huc_fini(struct intel_huc *huc) * * Called after HuC and GuC firmware loading during intel_uc_init_hw(). * - * This function pins HuC firmware image object into GGTT. - * Then it invokes GuC action to authenticate passing the offset to RSA - * signature through intel_guc_auth_huc(). It then waits for 50ms for - * firmware verification ACK and unpins the object. + * This function invokes the GuC action to authenticate the HuC firmware, + * passing the offset of the RSA signature to intel_guc_auth_huc(). It then + * waits for up to 50ms for firmware verification ACK. */ int intel_huc_auth(struct intel_huc *huc) { @@ -185,7 +212,7 @@ int intel_huc_check_status(struct intel_huc *huc) if (!intel_huc_is_supported(huc)) return -ENODEV; - with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + with_intel_runtime_pm(gt->uncore->rpm, wakeref) status = intel_uncore_read(gt->uncore, huc->status.reg); return (status & huc->status.mask) == huc->status.value; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c index 74602487ed67..d654340d4d03 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c @@ -8,21 +8,6 @@ #include "i915_drv.h" /** - * DOC: HuC Firmware - * - * Motivation: - * GEN9 introduces a new dedicated firmware for usage in media HEVC (High - * Efficiency Video Coding) operations. Userspace can use the firmware - * capabilities by adding HuC specific commands to batch buffers. - * - * Implementation: - * The same firmware loader is used as the GuC. However, the actual - * loading to HW is deferred until GEM initialization is done. - * - * Note that HuC firmware loading must be done before GuC loading. - */ - -/** * intel_huc_fw_init_early() - initializes HuC firmware struct * @huc: intel_huc struct * diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 71ee7ab035cc..3fdbc935d155 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -224,17 +224,7 @@ static int guc_enable_communication(struct intel_guc *guc) return 0; } -static void guc_stop_communication(struct intel_guc *guc) -{ - intel_guc_ct_stop(&guc->ct); - - guc->send = intel_guc_send_nop; - guc->handler = intel_guc_to_host_event_handler_nop; - - guc_clear_mmio_msg(guc); -} - -static void guc_disable_communication(struct intel_guc *guc) +static void __guc_stop_communication(struct intel_guc *guc) { /* * Events generated during or after CT disable are logged by guc in @@ -247,6 +237,20 @@ static void guc_disable_communication(struct intel_guc *guc) guc->send = intel_guc_send_nop; guc->handler = intel_guc_to_host_event_handler_nop; +} + +static void guc_stop_communication(struct intel_guc *guc) +{ + intel_guc_ct_stop(&guc->ct); + + __guc_stop_communication(guc); + + DRM_INFO("GuC communication stopped\n"); +} + +static void guc_disable_communication(struct intel_guc *guc) +{ + __guc_stop_communication(guc); intel_guc_ct_disable(&guc->ct); @@ -537,7 +541,9 @@ void intel_uc_fini_hw(struct intel_uc *uc) if (intel_uc_supports_guc_submission(uc)) intel_guc_submission_disable(guc); - guc_disable_communication(guc); + if (guc_communication_enabled(guc)) + guc_disable_communication(guc); + __uc_sanitize(uc); } @@ -581,7 +587,7 @@ void intel_uc_suspend(struct intel_uc *uc) if (!intel_guc_is_running(guc)) return; - with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) + with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref) intel_uc_runtime_suspend(uc); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index a34e95a07b1d..bb4889d2346d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -39,25 +39,27 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * Must be ordered based on platform + revid, from newer to older. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 8, 4, 3238)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 03, 01, 2893)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \ - fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 01, 8, 2893)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 01, 07, 1398)) - -#define __MAKE_UC_FW_PATH(prefix_, name_, separator_, major_, minor_, patch_) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ + fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 2, 0, 0)) + +#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ "i915/" \ __stringify(prefix_) name_ \ - __stringify(major_) separator_ \ - __stringify(minor_) separator_ \ + __stringify(major_) "." \ + __stringify(minor_) "." \ __stringify(patch_) ".bin" #define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \ - __MAKE_UC_FW_PATH(prefix_, "_guc_", ".", major_, minor_, patch_) + __MAKE_UC_FW_PATH(prefix_, "_guc_", major_, minor_, patch_) #define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \ - __MAKE_UC_FW_PATH(prefix_, "_huc_ver", "_", major_, minor_, bld_num_) + __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_) /* All blobs need to be declared via MODULE_FIRMWARE() */ #define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \ @@ -337,25 +339,10 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915) } /* Get version numbers from the CSS header */ - switch (uc_fw->type) { - case INTEL_UC_FW_TYPE_GUC: - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MAJOR, - css->sw_version); - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MINOR, - css->sw_version); - break; - - case INTEL_UC_FW_TYPE_HUC: - uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MAJOR, - css->sw_version); - uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MINOR, - css->sw_version); - break; - - default: - MISSING_CASE(uc_fw->type); - break; - } + uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, + css->sw_version); + uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR, + css->sw_version); if (uc_fw->major_ver_found != uc_fw->major_ver_wanted || uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h index ae58e8a8c53b..029214cdedd5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -39,9 +39,6 @@ * 3. Length info of each component can be found in header, in dwords. * 4. Modulus and exponent key are not required by driver. They may not appear * in fw. So driver will load a truncated firmware in this case. - * - * The only difference between GuC and HuC firmwares is how the version - * information is saved. */ struct uc_css_header { @@ -69,11 +66,9 @@ struct uc_css_header { char username[8]; char buildnumber[12]; u32 sw_version; -#define CSS_SW_VERSION_GUC_MAJOR (0xFF << 16) -#define CSS_SW_VERSION_GUC_MINOR (0xFF << 8) -#define CSS_SW_VERSION_GUC_PATCH (0xFF << 0) -#define CSS_SW_VERSION_HUC_MAJOR (0xFFFF << 16) -#define CSS_SW_VERSION_HUC_MINOR (0xFFFF << 0) +#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) +#define CSS_SW_VERSION_UC_MINOR (0xFF << 8) +#define CSS_SW_VERSION_UC_PATCH (0xFF << 0) u32 reserved[14]; u32 header_info; } __packed; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index bba0eafe1cdb..d8a80388bd31 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -108,23 +108,15 @@ static bool client_doorbell_in_sync(struct intel_guc_client *client) * validating that the doorbells status expected by the driver matches what the * GuC/HW have. */ -static int igt_guc_clients(void *args) +static int igt_guc_clients(void *arg) { - struct drm_i915_private *dev_priv = args; + struct intel_gt *gt = arg; + struct intel_guc *guc = >->uc.guc; intel_wakeref_t wakeref; - struct intel_guc *guc; int err = 0; - GEM_BUG_ON(!HAS_GT_UC(dev_priv)); - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - guc = &dev_priv->gt.uc.guc; - if (!guc) { - pr_err("No guc object!\n"); - err = -EINVAL; - goto unlock; - } + GEM_BUG_ON(!HAS_GT_UC(gt->i915)); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); err = check_all_doorbells(guc); if (err) @@ -189,8 +181,7 @@ out: guc_clients_create(guc); guc_clients_enable(guc); unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; } @@ -201,22 +192,14 @@ unlock: */ static int igt_guc_doorbells(void *arg) { - struct drm_i915_private *dev_priv = arg; + struct intel_gt *gt = arg; + struct intel_guc *guc = >->uc.guc; intel_wakeref_t wakeref; - struct intel_guc *guc; int i, err = 0; u16 db_id; - GEM_BUG_ON(!HAS_GT_UC(dev_priv)); - mutex_lock(&dev_priv->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - - guc = &dev_priv->gt.uc.guc; - if (!guc) { - pr_err("No guc object!\n"); - err = -EINVAL; - goto unlock; - } + GEM_BUG_ON(!HAS_GT_UC(gt->i915)); + wakeref = intel_runtime_pm_get(gt->uncore->rpm); err = check_all_doorbells(guc); if (err) @@ -298,20 +281,19 @@ out: guc_client_free(clients[i]); } unlock: - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); return err; } -int intel_guc_live_selftest(struct drm_i915_private *dev_priv) +int intel_guc_live_selftest(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_guc_clients), SUBTEST(igt_guc_doorbells), }; - if (!USES_GUC_SUBMISSION(dev_priv)) + if (!USES_GUC_SUBMISSION(i915)) return 0; - return i915_subtests(tests, dev_priv); + return intel_gt_live_subtests(tests, &i915->gt); } diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 5ff2437b2998..771420453f82 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -61,14 +61,14 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm) flags = PIN_MAPPABLE; } - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); mmio_hw_access_pre(dev_priv); ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node, size, I915_GTT_PAGE_SIZE, I915_COLOR_UNEVICTABLE, start, end, flags); mmio_hw_access_post(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); if (ret) gvt_err("fail to alloc %s gm space from host\n", high_gm ? "high" : "low"); @@ -98,9 +98,9 @@ static int alloc_vgpu_gm(struct intel_vgpu *vgpu) return 0; out_free_aperture: - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); drm_mm_remove_node(&vgpu->gm.low_gm_node); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); return ret; } @@ -108,10 +108,10 @@ static void free_vgpu_gm(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); drm_mm_remove_node(&vgpu->gm.low_gm_node); drm_mm_remove_node(&vgpu->gm.high_gm_node); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); } /** @@ -198,7 +198,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) mutex_lock(&dev_priv->ggtt.vm.mutex); for (i = 0; i < vgpu_fence_sz(vgpu); i++) { - reg = i915_reserve_fence(dev_priv); + reg = i915_reserve_fence(&dev_priv->ggtt); if (IS_ERR(reg)) goto out_free_fence; diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index f21b8fb5b37e..d6e7a1189bad 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -534,7 +534,7 @@ static void clean_execlist(struct intel_vgpu *vgpu, struct intel_vgpu_submission *s = &vgpu->submission; intel_engine_mask_t tmp; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { kfree(s->ring_scan_buffer[engine->id]); s->ring_scan_buffer[engine->id] = NULL; s->ring_scan_buffer_size[engine->id] = 0; @@ -548,7 +548,7 @@ static void reset_execlist(struct intel_vgpu *vgpu, struct intel_engine_cs *engine; intel_engine_mask_t tmp; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) + for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) init_vgpu_execlist(vgpu, engine->id); } diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 25f78196b964..45a9124e53b6 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2796,7 +2796,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS); MMIO_D(WM_MISC, D_BDW); - MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW); + MMIO_D(_MMIO(_SRD_CTL_EDP), D_BDW); MMIO_D(_MMIO(0x6671c), D_BDW_PLUS); MMIO_D(_MMIO(0x66c00), D_BDW_PLUS); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 343d79c1cb7e..04a5a0d90823 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1564,27 +1564,10 @@ vgpu_id_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "\n"); } -static ssize_t -hw_id_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct mdev_device *mdev = mdev_from_dev(dev); - - if (mdev) { - struct intel_vgpu *vgpu = (struct intel_vgpu *) - mdev_get_drvdata(mdev); - return sprintf(buf, "%u\n", - vgpu->submission.shadow[0]->gem_context->hw_id); - } - return sprintf(buf, "\n"); -} - static DEVICE_ATTR_RO(vgpu_id); -static DEVICE_ATTR_RO(hw_id); static struct attribute *intel_vgpu_attrs[] = { &dev_attr_vgpu_id.attr, - &dev_attr_hw_id.attr, NULL }; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 6c79d16b381e..36bb7639e82f 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -365,7 +365,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, struct i915_gem_context *ctx) { struct intel_vgpu_mm *mm = workload->shadow_mm; - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm); + struct i915_ppgtt *ppgtt = + i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); int i = 0; if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { @@ -378,6 +379,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; } } + + i915_vm_put(&ppgtt->vm); } static int @@ -385,11 +388,8 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct i915_request *rq; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (workload->req) return 0; @@ -415,10 +415,9 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + lockdep_assert_held(&vgpu->vgpu_lock); if (workload->shadow) return 0; @@ -580,8 +579,6 @@ static void update_vreg_in_ctx(struct intel_vgpu_workload *workload) static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) { - struct intel_vgpu *vgpu = workload->vgpu; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_vgpu_shadow_bb *bb, *pos; if (list_empty(&workload->shadow_bb)) @@ -590,8 +587,6 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) bb = list_first_entry(&workload->shadow_bb, struct intel_vgpu_shadow_bb, list); - mutex_lock(&dev_priv->drm.struct_mutex); - list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { if (bb->obj) { if (bb->accessing) @@ -609,8 +604,6 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) list_del(&bb->list); kfree(bb); } - - mutex_unlock(&dev_priv->drm.struct_mutex); } static int prepare_workload(struct intel_vgpu_workload *workload) @@ -685,7 +678,6 @@ err_unpin_mm: static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct i915_request *rq; int ring_id = workload->ring_id; int ret; @@ -694,7 +686,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ring_id, workload); mutex_lock(&vgpu->vgpu_lock); - mutex_lock(&dev_priv->drm.struct_mutex); ret = intel_gvt_workload_req_alloc(workload); if (ret) @@ -729,7 +720,6 @@ out: err_req: if (ret) workload->status = ret; - mutex_unlock(&dev_priv->drm.struct_mutex); mutex_unlock(&vgpu->vgpu_lock); return ret; } @@ -887,7 +877,7 @@ void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, intel_engine_mask_t tmp; /* free the unsubmited workloads in the queues. */ - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { list_for_each_entry_safe(pos, n, &s->workload_q_head[engine->id], list) { list_del_init(&pos->list); @@ -1233,20 +1223,18 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) struct intel_vgpu_submission *s = &vgpu->submission; struct intel_engine_cs *engine; struct i915_gem_context *ctx; + struct i915_ppgtt *ppgtt; enum intel_engine_id i; int ret; - mutex_lock(&i915->drm.struct_mutex); - ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MAX); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto out_unlock; - } + if (IS_ERR(ctx)) + return PTR_ERR(ctx); i915_gem_context_set_force_single_submission(ctx); - i915_context_ppgtt_root_save(s, i915_vm_to_ppgtt(ctx->vm)); + ppgtt = i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); + i915_context_ppgtt_root_save(s, ppgtt); for_each_engine(engine, i915, i) { struct intel_context *ce; @@ -1291,12 +1279,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) atomic_set(&s->running_workload_num, 0); bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); + i915_vm_put(&ppgtt->vm); i915_gem_context_put(ctx); - mutex_unlock(&i915->drm.struct_mutex); return 0; out_shadow_ctx: - i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(ctx->vm)); + i915_context_ppgtt_root_restore(s, ppgtt); for_each_engine(engine, i915, i) { if (IS_ERR(s->shadow[i])) break; @@ -1304,9 +1292,8 @@ out_shadow_ctx: intel_context_unpin(s->shadow[i]); intel_context_put(s->shadow[i]); } + i915_vm_put(&ppgtt->vm); i915_gem_context_put(ctx); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -1597,9 +1584,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, */ if (list_empty(workload_q_head(vgpu, ring_id))) { intel_runtime_pm_get(&dev_priv->runtime_pm); - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&vgpu->vgpu_lock); ret = intel_gvt_scan_and_shadow_workload(workload); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&vgpu->vgpu_lock); intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm); } diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 48e16ad93bbd..7927b1a0c7a6 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -12,8 +12,6 @@ #include "i915_active.h" #include "i915_globals.h" -#define BKL(ref) (&(ref)->i915->drm.struct_mutex) - /* * Active refs memory management * @@ -27,35 +25,35 @@ static struct i915_global_active { } global; struct active_node { - struct i915_active_request base; + struct i915_active_fence base; struct i915_active *ref; struct rb_node node; u64 timeline; }; static inline struct active_node * -node_from_active(struct i915_active_request *active) +node_from_active(struct i915_active_fence *active) { return container_of(active, struct active_node, base); } #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) -static inline bool is_barrier(const struct i915_active_request *active) +static inline bool is_barrier(const struct i915_active_fence *active) { - return IS_ERR(rcu_access_pointer(active->request)); + return IS_ERR(rcu_access_pointer(active->fence)); } static inline struct llist_node *barrier_to_ll(struct active_node *node) { GEM_BUG_ON(!is_barrier(&node->base)); - return (struct llist_node *)&node->base.link; + return (struct llist_node *)&node->base.cb.node; } static inline struct intel_engine_cs * __barrier_to_engine(struct active_node *node) { - return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev); + return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev); } static inline struct intel_engine_cs * @@ -68,7 +66,7 @@ barrier_to_engine(struct active_node *node) static inline struct active_node *barrier_from_ll(struct llist_node *x) { return container_of((struct list_head *)x, - struct active_node, base.link); + struct active_node, base.cb.node); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) @@ -92,12 +90,16 @@ static void debug_active_init(struct i915_active *ref) static void debug_active_activate(struct i915_active *ref) { - debug_object_activate(ref, &active_debug_desc); + lockdep_assert_held(&ref->mutex); + if (!atomic_read(&ref->count)) /* before the first inc */ + debug_object_activate(ref, &active_debug_desc); } static void debug_active_deactivate(struct i915_active *ref) { - debug_object_deactivate(ref, &active_debug_desc); + lockdep_assert_held(&ref->mutex); + if (!atomic_read(&ref->count)) /* after the last dec */ + debug_object_deactivate(ref, &active_debug_desc); } static void debug_active_fini(struct i915_active *ref) @@ -128,6 +130,7 @@ __active_retire(struct i915_active *ref) bool retire = false; lockdep_assert_held(&ref->mutex); + GEM_BUG_ON(i915_active_is_idle(ref)); /* return the unused nodes to our slabcache -- flushing the allocator */ if (atomic_dec_and_test(&ref->count)) { @@ -142,14 +145,31 @@ __active_retire(struct i915_active *ref) if (!retire) return; + GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); rbtree_postorder_for_each_entry_safe(it, n, &root, node) { - GEM_BUG_ON(i915_active_request_isset(&it->base)); + GEM_BUG_ON(i915_active_fence_isset(&it->base)); kmem_cache_free(global.slab_cache, it); } /* After the final retire, the entire struct may be freed */ if (ref->retire) ref->retire(ref); + + /* ... except if you wait on it, you must manage your own references! */ + wake_up_var(ref); +} + +static void +active_work(struct work_struct *wrk) +{ + struct i915_active *ref = container_of(wrk, typeof(*ref), work); + + GEM_BUG_ON(!atomic_read(&ref->count)); + if (atomic_add_unless(&ref->count, -1, 1)) + return; + + mutex_lock(&ref->mutex); + __active_retire(ref); } static void @@ -159,18 +179,31 @@ active_retire(struct i915_active *ref) if (atomic_add_unless(&ref->count, -1, 1)) return; - /* One active may be flushed from inside the acquire of another */ - mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); + /* If we are inside interrupt context (fence signaling), defer */ + if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS || + !mutex_trylock(&ref->mutex)) { + queue_work(system_unbound_wq, &ref->work); + return; + } + __active_retire(ref); } static void -node_retire(struct i915_active_request *base, struct i915_request *rq) +node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) { - active_retire(node_from_active(base)->ref); + i915_active_fence_cb(fence, cb); + active_retire(container_of(cb, struct active_node, base.cb)->ref); } -static struct i915_active_request * +static void +excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + i915_active_fence_cb(fence, cb); + active_retire(container_of(cb, struct i915_active, excl.cb)); +} + +static struct i915_active_fence * active_instance(struct i915_active *ref, struct intel_timeline *tl) { struct active_node *node, *prealloc; @@ -214,7 +247,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) } node = prealloc; - i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire); + __i915_active_fence_init(&node->base, &tl->mutex, NULL, node_retire); node->ref = ref; node->timeline = idx; @@ -229,23 +262,28 @@ out: return &node->base; } -void __i915_active_init(struct drm_i915_private *i915, - struct i915_active *ref, +void __i915_active_init(struct i915_active *ref, int (*active)(struct i915_active *ref), void (*retire)(struct i915_active *ref), struct lock_class_key *key) { + unsigned long bits; + debug_active_init(ref); - ref->i915 = i915; ref->flags = 0; ref->active = active; - ref->retire = retire; + ref->retire = ptr_unpack_bits(retire, &bits, 2); + if (bits & I915_ACTIVE_MAY_SLEEP) + ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; + ref->tree = RB_ROOT; ref->cache = NULL; init_llist_head(&ref->preallocated_barriers); atomic_set(&ref->count, 0); __mutex_init(&ref->mutex, "i915_active", key); + __i915_active_fence_init(&ref->excl, &ref->mutex, NULL, excl_retire); + INIT_WORK(&ref->work, active_work); } static bool ____active_del_barrier(struct i915_active *ref, @@ -298,9 +336,9 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node) int i915_active_ref(struct i915_active *ref, struct intel_timeline *tl, - struct i915_request *rq) + struct dma_fence *fence) { - struct i915_active_request *active; + struct i915_active_fence *active; int err; lockdep_assert_held(&tl->mutex); @@ -323,26 +361,44 @@ int i915_active_ref(struct i915_active *ref, * request that we want to emit on the kernel_context. */ __active_del_barrier(ref, node_from_active(active)); - RCU_INIT_POINTER(active->request, NULL); - INIT_LIST_HEAD(&active->link); - } else { - if (!i915_active_request_isset(active)) - atomic_inc(&ref->count); + RCU_INIT_POINTER(active->fence, NULL); + atomic_dec(&ref->count); } - GEM_BUG_ON(!atomic_read(&ref->count)); - __i915_active_request_set(active, rq); + if (!__i915_active_fence_set(active, fence)) + atomic_inc(&ref->count); out: i915_active_release(ref); return err; } +void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) +{ + /* We expect the caller to manage the exclusive timeline ordering */ + GEM_BUG_ON(i915_active_is_idle(ref)); + + /* + * As we don't know which mutex the caller is using, we told a small + * lie to the debug code that it is using the i915_active.mutex; + * and now we must stick to that lie. + */ + mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_); + if (!__i915_active_fence_set(&ref->excl, f)) + atomic_inc(&ref->count); + mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_); +} + +bool i915_active_acquire_if_busy(struct i915_active *ref) +{ + debug_active_assert(ref); + return atomic_add_unless(&ref->count, 1, 0); +} + int i915_active_acquire(struct i915_active *ref) { int err; - debug_active_assert(ref); - if (atomic_add_unless(&ref->count, 1, 0)) + if (i915_active_acquire_if_busy(ref)) return 0; err = mutex_lock_interruptible(&ref->mutex); @@ -367,109 +423,66 @@ void i915_active_release(struct i915_active *ref) active_retire(ref); } -static void __active_ungrab(struct i915_active *ref) +static void enable_signaling(struct i915_active_fence *active) { - clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags); -} + struct dma_fence *fence; -bool i915_active_trygrab(struct i915_active *ref) -{ - debug_active_assert(ref); - - if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)) - return false; - - if (!atomic_add_unless(&ref->count, 1, 0)) { - __active_ungrab(ref); - return false; - } - - return true; -} - -void i915_active_ungrab(struct i915_active *ref) -{ - GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)); + fence = i915_active_fence_get(active); + if (!fence) + return; - active_retire(ref); - __active_ungrab(ref); + dma_fence_enable_sw_signaling(fence); + dma_fence_put(fence); } int i915_active_wait(struct i915_active *ref) { struct active_node *it, *n; - int err; + int err = 0; might_sleep(); - might_lock(&ref->mutex); - if (i915_active_is_idle(ref)) + if (!i915_active_acquire_if_busy(ref)) return 0; - err = mutex_lock_interruptible(&ref->mutex); - if (err) - return err; - - if (!atomic_add_unless(&ref->count, 1, 0)) { - mutex_unlock(&ref->mutex); - return 0; - } - + /* Flush lazy signals */ + enable_signaling(&ref->excl); rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - if (is_barrier(&it->base)) { /* unconnected idle-barrier */ - err = -EBUSY; - break; - } + if (is_barrier(&it->base)) /* unconnected idle barrier */ + continue; - err = i915_active_request_retire(&it->base, BKL(ref)); - if (err) - break; + enable_signaling(&it->base); } + /* Any fence added after the wait begins will not be auto-signaled */ - __active_retire(ref); + i915_active_release(ref); if (err) return err; - if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE)) + if (wait_var_event_interruptible(ref, i915_active_is_idle(ref))) return -EINTR; - if (!i915_active_is_idle(ref)) - return -EBUSY; - return 0; } -int i915_request_await_active_request(struct i915_request *rq, - struct i915_active_request *active) -{ - struct i915_request *barrier = - i915_active_request_raw(active, &rq->i915->drm.struct_mutex); - - return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; -} - int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) { - struct active_node *it, *n; - int err; + int err = 0; - if (RB_EMPTY_ROOT(&ref->tree)) - return 0; - - /* await allocates and so we need to avoid hitting the shrinker */ - err = i915_active_acquire(ref); - if (err) - return err; + if (rcu_access_pointer(ref->excl.fence)) { + struct dma_fence *fence; - mutex_lock(&ref->mutex); - rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - err = i915_request_await_active_request(rq, &it->base); - if (err) - break; + rcu_read_lock(); + fence = dma_fence_get_rcu_safe(&ref->excl.fence); + rcu_read_unlock(); + if (fence) { + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } } - mutex_unlock(&ref->mutex); - i915_active_release(ref); + /* In the future we may choose to await on all fences */ + return err; } @@ -477,15 +490,16 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) void i915_active_fini(struct i915_active *ref) { debug_active_fini(ref); - GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); GEM_BUG_ON(atomic_read(&ref->count)); + GEM_BUG_ON(work_pending(&ref->work)); + GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); mutex_destroy(&ref->mutex); } #endif static inline bool is_idle_barrier(struct active_node *node, u64 idx) { - return node->timeline == idx && !i915_active_request_isset(&node->base); + return node->timeline == idx && !i915_active_fence_isset(&node->base); } static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) @@ -576,8 +590,8 @@ match: int i915_active_acquire_preallocate_barrier(struct i915_active *ref, struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = engine->i915; intel_engine_mask_t tmp, mask = engine->mask; + struct intel_gt *gt = engine->gt; struct llist_node *pos, *next; int err; @@ -589,7 +603,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, * We can then use the preallocated nodes in * i915_active_acquire_barrier() */ - for_each_engine_masked(engine, i915, mask, tmp) { + for_each_engine_masked(engine, gt, mask, tmp) { u64 idx = engine->kernel_context->timeline->fence_context; struct active_node *node; @@ -605,13 +619,13 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, node->base.lock = &engine->kernel_context->timeline->mutex; #endif - RCU_INIT_POINTER(node->base.request, NULL); - node->base.retire = node_retire; + RCU_INIT_POINTER(node->base.fence, NULL); + node->base.cb.func = node_retire; node->timeline = idx; node->ref = ref; } - if (!i915_active_request_isset(&node->base)) { + if (!i915_active_fence_isset(&node->base)) { /* * Mark this as being *our* unconnected proto-node. * @@ -621,8 +635,8 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, * and then we can use the rb_node and list pointers * for our tracking of the pending barrier. */ - RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); - node->base.link.prev = (void *)engine; + RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); + node->base.cb.node.prev = (void *)engine; atomic_inc(&ref->count); } @@ -679,6 +693,7 @@ void i915_active_acquire_barrier(struct i915_active *ref) rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); llist_add(barrier_to_ll(node), &engine->barrier_tasks); intel_engine_pm_put(engine); } @@ -689,44 +704,113 @@ void i915_request_add_active_barriers(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct llist_node *node, *next; + unsigned long flags; GEM_BUG_ON(intel_engine_is_virtual(engine)); - GEM_BUG_ON(rq->timeline != engine->kernel_context->timeline); + GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); + node = llist_del_all(&engine->barrier_tasks); + if (!node) + return; /* * Attach the list of proto-fences to the in-flight request such * that the parent i915_active will be released when this request * is retired. */ - llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { - RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq); + spin_lock_irqsave(&rq->lock, flags); + llist_for_each_safe(node, next, node) { + RCU_INIT_POINTER(barrier_from_ll(node)->base.fence, &rq->fence); smp_wmb(); /* serialise with reuse_idle_barrier */ - list_add_tail((struct list_head *)node, &rq->active_list); + list_add_tail((struct list_head *)node, &rq->fence.cb_list); } + spin_unlock_irqrestore(&rq->lock, flags); } -int i915_active_request_set(struct i915_active_request *active, - struct i915_request *rq) +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +#define active_is_held(active) lockdep_is_held((active)->lock) +#else +#define active_is_held(active) true +#endif + +/* + * __i915_active_fence_set: Update the last active fence along its timeline + * @active: the active tracker + * @fence: the new fence (under construction) + * + * Records the new @fence as the last active fence along its timeline in + * this active tracker, moving the tracking callbacks from the previous + * fence onto this one. Returns the previous fence (if not already completed), + * which the caller must ensure is executed before the new fence. To ensure + * that the order of fences within the timeline of the i915_active_fence is + * maintained, it must be locked by the caller. + */ +struct dma_fence * +__i915_active_fence_set(struct i915_active_fence *active, + struct dma_fence *fence) { - int err; + struct dma_fence *prev; + unsigned long flags; + + /* NB: must be serialised by an outer timeline mutex (active->lock) */ + spin_lock_irqsave(fence->lock, flags); + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); + + prev = rcu_dereference_protected(active->fence, active_is_held(active)); + if (prev) { + GEM_BUG_ON(prev == fence); + spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); + __list_del_entry(&active->cb.node); + spin_unlock(prev->lock); /* serialise with prev->cb_list */ + + /* + * active->fence is reset by the callback from inside + * interrupt context. We need to serialise our list + * manipulation with the fence->lock to prevent the prev + * being lost inside an interrupt (it can't be replaced as + * no other caller is allowed to enter __i915_active_fence_set + * as we hold the timeline lock). After serialising with + * the callback, we need to double check which ran first, + * our list_del() [decoupling prev from the callback] or + * the callback... + */ + prev = rcu_access_pointer(active->fence); + } + + rcu_assign_pointer(active->fence, fence); + list_add_tail(&active->cb.node, &fence->cb_list); + + spin_unlock_irqrestore(fence->lock, flags); + + return prev; +} + +int i915_active_fence_set(struct i915_active_fence *active, + struct i915_request *rq) +{ + struct dma_fence *fence; + int err = 0; #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) lockdep_assert_held(active->lock); #endif - /* Must maintain ordering wrt previous active requests */ - err = i915_request_await_active_request(rq, active); - if (err) - return err; + /* Must maintain timeline ordering wrt previous active requests */ + rcu_read_lock(); + fence = __i915_active_fence_set(active, &rq->fence); + if (fence) /* but the previous fence may not belong to that timeline! */ + fence = dma_fence_get_rcu(fence); + rcu_read_unlock(); + if (fence) { + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } - __i915_active_request_set(active, rq); - return 0; + return err; } -void i915_active_retire_noop(struct i915_active_request *active, - struct i915_request *request) +void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) { - /* Space left intentionally blank */ + i915_active_fence_cb(fence, cb); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index f95058f99057..4f52fe6146d2 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -12,6 +12,10 @@ #include "i915_active_types.h" #include "i915_request.h" +struct i915_request; +struct intel_engine_cs; +struct intel_timeline; + /* * We treat requests as fences. This is not be to confused with our * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. @@ -28,308 +32,108 @@ * write access so that we can perform concurrent read operations between * the CPU and GPU engines, as well as waiting for all rendering to * complete, or waiting for the last GPU user of a "fence register". The - * object then embeds a #i915_active_request to track the most recent (in + * object then embeds a #i915_active_fence to track the most recent (in * retirement order) request relevant for the desired mode of access. - * The #i915_active_request is updated with i915_active_request_set() to + * The #i915_active_fence is updated with i915_active_fence_set() to * track the most recent fence request, typically this is done as part of * i915_vma_move_to_active(). * - * When the #i915_active_request completes (is retired), it will + * When the #i915_active_fence completes (is retired), it will * signal its completion to the owner through a callback as well as mark - * itself as idle (i915_active_request.request == NULL). The owner + * itself as idle (i915_active_fence.request == NULL). The owner * can then perform any action, such as delayed freeing of an active * resource including itself. */ -void i915_active_retire_noop(struct i915_active_request *active, - struct i915_request *request); +void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb); /** - * i915_active_request_init - prepares the activity tracker for use + * __i915_active_fence_init - prepares the activity tracker for use * @active - the active tracker - * @rq - initial request to track, can be NULL + * @fence - initial fence to track, can be NULL * @func - a callback when then the tracker is retired (becomes idle), * can be NULL * - * i915_active_request_init() prepares the embedded @active struct for use as - * an activity tracker, that is for tracking the last known active request - * associated with it. When the last request becomes idle, when it is retired + * i915_active_fence_init() prepares the embedded @active struct for use as + * an activity tracker, that is for tracking the last known active fence + * associated with it. When the last fence becomes idle, when it is retired * after completion, the optional callback @func is invoked. */ static inline void -i915_active_request_init(struct i915_active_request *active, +__i915_active_fence_init(struct i915_active_fence *active, struct mutex *lock, - struct i915_request *rq, - i915_active_retire_fn retire) + void *fence, + dma_fence_func_t fn) { - RCU_INIT_POINTER(active->request, rq); - INIT_LIST_HEAD(&active->link); - active->retire = retire ?: i915_active_retire_noop; + RCU_INIT_POINTER(active->fence, fence); + active->cb.func = fn ?: i915_active_noop; #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) active->lock = lock; #endif } -#define INIT_ACTIVE_REQUEST(name, lock) \ - i915_active_request_init((name), (lock), NULL, NULL) - -/** - * i915_active_request_set - updates the tracker to watch the current request - * @active - the active tracker - * @request - the request to watch - * - * __i915_active_request_set() watches the given @request for completion. Whilst - * that @request is busy, the @active reports busy. When that @request is - * retired, the @active tracker is updated to report idle. - */ -static inline void -__i915_active_request_set(struct i915_active_request *active, - struct i915_request *request) -{ -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) - lockdep_assert_held(active->lock); -#endif - list_move(&active->link, &request->active_list); - rcu_assign_pointer(active->request, request); -} - -int __must_check -i915_active_request_set(struct i915_active_request *active, - struct i915_request *rq); - -/** - * i915_active_request_raw - return the active request - * @active - the active tracker - * - * i915_active_request_raw() returns the current request being tracked, or NULL. - * It does not obtain a reference on the request for the caller, so the caller - * must hold struct_mutex. - */ -static inline struct i915_request * -i915_active_request_raw(const struct i915_active_request *active, - struct mutex *mutex) -{ - return rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); -} - -/** - * i915_active_request_peek - report the active request being monitored - * @active - the active tracker - * - * i915_active_request_peek() returns the current request being tracked if - * still active, or NULL. It does not obtain a reference on the request - * for the caller, so the caller must hold struct_mutex. - */ -static inline struct i915_request * -i915_active_request_peek(const struct i915_active_request *active, - struct mutex *mutex) -{ - struct i915_request *request; - - request = i915_active_request_raw(active, mutex); - if (!request || i915_request_completed(request)) - return NULL; +#define INIT_ACTIVE_FENCE(A, LOCK) \ + __i915_active_fence_init((A), (LOCK), NULL, NULL) - return request; -} - -/** - * i915_active_request_get - return a reference to the active request - * @active - the active tracker - * - * i915_active_request_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold struct_mutex. - */ -static inline struct i915_request * -i915_active_request_get(const struct i915_active_request *active, - struct mutex *mutex) -{ - return i915_request_get(i915_active_request_peek(active, mutex)); -} +struct dma_fence * +__i915_active_fence_set(struct i915_active_fence *active, + struct dma_fence *fence); /** - * __i915_active_request_get_rcu - return a reference to the active request + * i915_active_fence_set - updates the tracker to watch the current fence * @active - the active tracker + * @rq - the request to watch * - * __i915_active_request_get() returns a reference to the active request, - * or NULL if the active tracker is idle. The caller must hold the RCU read - * lock, but the returned pointer is safe to use outside of RCU. + * i915_active_fence_set() watches the given @rq for completion. While + * that @rq is busy, the @active reports busy. When that @rq is signaled + * (or else retired) the @active tracker is updated to report idle. */ -static inline struct i915_request * -__i915_active_request_get_rcu(const struct i915_active_request *active) -{ - /* - * Performing a lockless retrieval of the active request is super - * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing - * slab of request objects will not be freed whilst we hold the - * RCU read lock. It does not guarantee that the request itself - * will not be freed and then *reused*. Viz, - * - * Thread A Thread B - * - * rq = active.request - * retire(rq) -> free(rq); - * (rq is now first on the slab freelist) - * active.request = NULL - * - * rq = new submission on a new object - * ref(rq) - * - * To prevent the request from being reused whilst the caller - * uses it, we take a reference like normal. Whilst acquiring - * the reference we check that it is not in a destroyed state - * (refcnt == 0). That prevents the request being reallocated - * whilst the caller holds on to it. To check that the request - * was not reallocated as we acquired the reference we have to - * check that our request remains the active request across - * the lookup, in the same manner as a seqlock. The visibility - * of the pointer versus the reference counting is controlled - * by using RCU barriers (rcu_dereference and rcu_assign_pointer). - * - * In the middle of all that, we inspect whether the request is - * complete. Retiring is lazy so the request may be completed long - * before the active tracker is updated. Querying whether the - * request is complete is far cheaper (as it involves no locked - * instructions setting cachelines to exclusive) than acquiring - * the reference, so we do it first. The RCU read lock ensures the - * pointer dereference is valid, but does not ensure that the - * seqno nor HWS is the right one! However, if the request was - * reallocated, that means the active tracker's request was complete. - * If the new request is also complete, then both are and we can - * just report the active tracker is idle. If the new request is - * incomplete, then we acquire a reference on it and check that - * it remained the active request. - * - * It is then imperative that we do not zero the request on - * reallocation, so that we can chase the dangling pointers! - * See i915_request_alloc(). - */ - do { - struct i915_request *request; - - request = rcu_dereference(active->request); - if (!request || i915_request_completed(request)) - return NULL; - - /* - * An especially silly compiler could decide to recompute the - * result of i915_request_completed, more specifically - * re-emit the load for request->fence.seqno. A race would catch - * a later seqno value, which could flip the result from true to - * false. Which means part of the instructions below might not - * be executed, while later on instructions are executed. Due to - * barriers within the refcounting the inconsistency can't reach - * past the call to i915_request_get_rcu, but not executing - * that while still executing i915_request_put() creates - * havoc enough. Prevent this with a compiler barrier. - */ - barrier(); - - request = i915_request_get_rcu(request); - - /* - * What stops the following rcu_access_pointer() from occurring - * before the above i915_request_get_rcu()? If we were - * to read the value before pausing to get the reference to - * the request, we may not notice a change in the active - * tracker. - * - * The rcu_access_pointer() is a mere compiler barrier, which - * means both the CPU and compiler are free to perform the - * memory read without constraint. The compiler only has to - * ensure that any operations after the rcu_access_pointer() - * occur afterwards in program order. This means the read may - * be performed earlier by an out-of-order CPU, or adventurous - * compiler. - * - * The atomic operation at the heart of - * i915_request_get_rcu(), see dma_fence_get_rcu(), is - * atomic_inc_not_zero() which is only a full memory barrier - * when successful. That is, if i915_request_get_rcu() - * returns the request (and so with the reference counted - * incremented) then the following read for rcu_access_pointer() - * must occur after the atomic operation and so confirm - * that this request is the one currently being tracked. - * - * The corresponding write barrier is part of - * rcu_assign_pointer(). - */ - if (!request || request == rcu_access_pointer(active->request)) - return rcu_pointer_handoff(request); - - i915_request_put(request); - } while (1); -} - +int __must_check +i915_active_fence_set(struct i915_active_fence *active, + struct i915_request *rq); /** - * i915_active_request_get_unlocked - return a reference to the active request + * i915_active_fence_get - return a reference to the active fence * @active - the active tracker * - * i915_active_request_get_unlocked() returns a reference to the active request, + * i915_active_fence_get() returns a reference to the active fence, * or NULL if the active tracker is idle. The reference is obtained under RCU, * so no locking is required by the caller. * - * The reference should be freed with i915_request_put(). + * The reference should be freed with dma_fence_put(). */ -static inline struct i915_request * -i915_active_request_get_unlocked(const struct i915_active_request *active) +static inline struct dma_fence * +i915_active_fence_get(struct i915_active_fence *active) { - struct i915_request *request; + struct dma_fence *fence; rcu_read_lock(); - request = __i915_active_request_get_rcu(active); + fence = dma_fence_get_rcu_safe(&active->fence); rcu_read_unlock(); - return request; + return fence; } /** - * i915_active_request_isset - report whether the active tracker is assigned + * i915_active_fence_isset - report whether the active tracker is assigned * @active - the active tracker * - * i915_active_request_isset() returns true if the active tracker is currently - * assigned to a request. Due to the lazy retiring, that request may be idle + * i915_active_fence_isset() returns true if the active tracker is currently + * assigned to a fence. Due to the lazy retiring, that fence may be idle * and this may report stale information. */ static inline bool -i915_active_request_isset(const struct i915_active_request *active) +i915_active_fence_isset(const struct i915_active_fence *active) { - return rcu_access_pointer(active->request); + return rcu_access_pointer(active->fence); } -/** - * i915_active_request_retire - waits until the request is retired - * @active - the active request on which to wait - * - * i915_active_request_retire() waits until the request is completed, - * and then ensures that at least the retirement handler for this - * @active tracker is called before returning. If the @active - * tracker is idle, the function returns immediately. - */ -static inline int __must_check -i915_active_request_retire(struct i915_active_request *active, - struct mutex *mutex) +static inline void +i915_active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct i915_request *request; - long ret; - - request = i915_active_request_raw(active, mutex); - if (!request) - return 0; + struct i915_active_fence *active = + container_of(cb, typeof(*active), cb); - ret = i915_request_wait(request, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (ret < 0) - return ret; - - list_del_init(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, request); - - return 0; + RCU_INIT_POINTER(active->fence, NULL); } /* @@ -358,34 +162,40 @@ i915_active_request_retire(struct i915_active_request *active, * synchronisation. */ -void __i915_active_init(struct drm_i915_private *i915, - struct i915_active *ref, +void __i915_active_init(struct i915_active *ref, int (*active)(struct i915_active *ref), void (*retire)(struct i915_active *ref), struct lock_class_key *key); -#define i915_active_init(i915, ref, active, retire) do { \ +#define i915_active_init(ref, active, retire) do { \ static struct lock_class_key __key; \ \ - __i915_active_init(i915, ref, active, retire, &__key); \ + __i915_active_init(ref, active, retire, &__key); \ } while (0) int i915_active_ref(struct i915_active *ref, struct intel_timeline *tl, - struct i915_request *rq); + struct dma_fence *fence); + +static inline int +i915_active_add_request(struct i915_active *ref, struct i915_request *rq) +{ + return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence); +} + +void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f); + +static inline bool i915_active_has_exclusive(struct i915_active *ref) +{ + return rcu_access_pointer(ref->excl.fence); +} int i915_active_wait(struct i915_active *ref); -int i915_request_await_active(struct i915_request *rq, - struct i915_active *ref); -int i915_request_await_active_request(struct i915_request *rq, - struct i915_active_request *active); +int i915_request_await_active(struct i915_request *rq, struct i915_active *ref); int i915_active_acquire(struct i915_active *ref); +bool i915_active_acquire_if_busy(struct i915_active *ref); void i915_active_release(struct i915_active *ref); -void __i915_active_release_nested(struct i915_active *ref, int subclass); - -bool i915_active_trygrab(struct i915_active *ref); -void i915_active_ungrab(struct i915_active *ref); static inline bool i915_active_is_idle(const struct i915_active *ref) diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index 1854e7d168c1..d89a74c142c6 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -8,22 +8,18 @@ #define _I915_ACTIVE_TYPES_H_ #include <linux/atomic.h> +#include <linux/dma-fence.h> #include <linux/llist.h> #include <linux/mutex.h> #include <linux/rbtree.h> #include <linux/rcupdate.h> +#include <linux/workqueue.h> -struct drm_i915_private; -struct i915_active_request; -struct i915_request; +#include "i915_utils.h" -typedef void (*i915_active_retire_fn)(struct i915_active_request *, - struct i915_request *); - -struct i915_active_request { - struct i915_request __rcu *request; - struct list_head link; - i915_active_retire_fn retire; +struct i915_active_fence { + struct dma_fence __rcu *fence; + struct dma_fence_cb cb; #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) /* * Incorporeal! @@ -43,20 +39,29 @@ struct i915_active_request { struct active_node; +#define I915_ACTIVE_MAY_SLEEP BIT(0) + +#define __i915_active_call __aligned(4) +#define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2) + struct i915_active { - struct drm_i915_private *i915; + atomic_t count; + struct mutex mutex; struct active_node *cache; struct rb_root tree; - struct mutex mutex; - atomic_t count; + + /* Preallocated "exclusive" node */ + struct i915_active_fence excl; unsigned long flags; -#define I915_ACTIVE_GRAB_BIT 0 +#define I915_ACTIVE_RETIRE_SLEEPS BIT(0) int (*active)(struct i915_active *ref); void (*retire)(struct i915_active *ref); + struct work_struct work; + struct llist_head preallocated_barriers; }; diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c index fe1871d7c126..e9d4200ce3bc 100644 --- a/drivers/gpu/drm/i915/i915_buddy.c +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -38,6 +38,7 @@ int __init i915_global_buddy_init(void) if (!global.slab_blocks) return -ENOMEM; + i915_global_register(&global.base); return 0; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b0f51591f2e4..ada57eee914a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -41,7 +41,9 @@ #include "gem/i915_gem_context.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" +#include "gt/intel_rc6.h" #include "gt/uc/intel_guc_submission.h" #include "i915_debugfs.h" @@ -61,11 +63,18 @@ static int i915_capabilities(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_device_info *info = INTEL_INFO(dev_priv); struct drm_printer p = drm_seq_file_printer(m); + const char *msg; seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv)); seq_printf(m, "platform: %s\n", intel_platform_name(info->platform)); seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv)); + msg = "n/a"; +#ifdef CONFIG_INTEL_IOMMU + msg = enableddisabled(intel_iommu_gfx_mapped); +#endif + seq_printf(m, "iommu: %s\n", msg); + intel_device_info_dump_flags(info, &p); intel_device_info_dump_runtime(RUNTIME_INFO(dev_priv), &p); intel_driver_caps_print(&dev_priv->caps, &p); @@ -77,11 +86,6 @@ static int i915_capabilities(struct seq_file *m, void *data) return 0; } -static char get_pin_flag(struct drm_i915_gem_object *obj) -{ - return obj->pin_global ? 'p' : ' '; -} - static char get_tiling_flag(struct drm_i915_gem_object *obj) { switch (i915_gem_object_get_tiling(obj)) { @@ -140,9 +144,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) struct i915_vma *vma; int pin_count = 0; - seq_printf(m, "%pK: %c%c%c%c %8zdKiB %02x %02x %s%s%s", + seq_printf(m, "%pK: %c%c%c %8zdKiB %02x %02x %s%s%s", &obj->base, - get_pin_flag(obj), get_tiling_flag(obj), get_global_flag(obj), get_pin_mapped_flag(obj), @@ -221,8 +224,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (pinned x %d)", pin_count); if (obj->stolen) seq_printf(m, " (stolen: %08llx)", obj->stolen->start); - if (obj->pin_global) - seq_printf(m, " (global)"); + if (i915_gem_object_is_framebuffer(obj)) + seq_printf(m, " (fb)"); engine = i915_gem_object_last_write_engine(obj); if (engine) @@ -243,6 +246,9 @@ static int per_file_stats(int id, void *ptr, void *data) struct file_stats *stats = data; struct i915_vma *vma; + if (!kref_get_unless_zero(&obj->base.refcount)) + return 0; + stats->count++; stats->total += obj->base.size; if (!atomic_read(&obj->bind_count)) @@ -290,6 +296,7 @@ static int per_file_stats(int id, void *ptr, void *data) } spin_unlock(&obj->vma.lock); + i915_gem_object_put(obj); return 0; } @@ -309,34 +316,44 @@ static void print_context_stats(struct seq_file *m, struct drm_i915_private *i915) { struct file_stats kstats = {}; - struct i915_gem_context *ctx; + struct i915_gem_context *ctx, *cn; - list_for_each_entry(ctx, &i915->contexts.list, link) { + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { struct i915_gem_engines_iter it; struct intel_context *ce; + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + spin_unlock(&i915->gem.contexts.lock); + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { intel_context_lock_pinned(ce); if (intel_context_is_pinned(ce)) { + rcu_read_lock(); if (ce->state) per_file_stats(0, ce->state->obj, &kstats); per_file_stats(0, ce->ring->vma->obj, &kstats); + rcu_read_unlock(); } intel_context_unlock_pinned(ce); } i915_gem_context_unlock_engines(ctx); if (!IS_ERR_OR_NULL(ctx->file_priv)) { - struct file_stats stats = { .vm = ctx->vm, }; + struct file_stats stats = { + .vm = rcu_access_pointer(ctx->vm), + }; struct drm_file *file = ctx->file_priv->file; struct task_struct *task; char name[80]; - spin_lock(&file->table_lock); + rcu_read_lock(); idr_for_each(&file->object_idr, per_file_stats, &stats); - spin_unlock(&file->table_lock); + rcu_read_unlock(); rcu_read_lock(); task = pid_task(ctx->pid ?: file->pid, PIDTYPE_PID); @@ -346,7 +363,12 @@ static void print_context_stats(struct seq_file *m, print_file_stats(m, name, stats); } + + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); } + spin_unlock(&i915->gem.contexts.lock); print_file_stats(m, "[k]contexts", kstats); } @@ -354,7 +376,6 @@ static void print_context_stats(struct seq_file *m, static int i915_gem_object_info(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); - int ret; seq_printf(m, "%u shrinkable [%u free] objects, %llu bytes\n", i915->mm.shrink_count, @@ -363,12 +384,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) seq_putc(m, '\n'); - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - print_context_stats(m, i915); - mutex_unlock(&i915->drm.struct_mutex); return 0; } @@ -376,7 +392,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) static void gen8_display_interrupt_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - int pipe; + enum pipe pipe; for_each_pipe(dev_priv, pipe) { enum intel_display_power_domain power_domain; @@ -527,6 +543,8 @@ static int i915_interrupt_info(struct seq_file *m, void *data) gen8_display_interrupt_info(m); } else if (IS_VALLEYVIEW(dev_priv)) { + intel_wakeref_t pref; + seq_printf(m, "Display IER:\t%08x\n", I915_READ(VLV_IER)); seq_printf(m, "Display IIR:\t%08x\n", @@ -537,7 +555,6 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(VLV_IMR)); for_each_pipe(dev_priv, pipe) { enum intel_display_power_domain power_domain; - intel_wakeref_t pref; power_domain = POWER_DOMAIN_PIPE(pipe); pref = intel_display_power_get_if_enabled(dev_priv, @@ -571,12 +588,14 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "PM IMR:\t\t%08x\n", I915_READ(GEN6_PMIMR)); + pref = intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); seq_printf(m, "Port hotplug:\t%08x\n", I915_READ(PORT_HOTPLUG_EN)); seq_printf(m, "DPFLIPSTAT:\t%08x\n", I915_READ(VLV_DPFLIPSTAT)); seq_printf(m, "DPINVGTT:\t%08x\n", I915_READ(DPINVGTT)); + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT, pref); } else if (!HAS_PCH_SPLIT(dev_priv)) { seq_printf(m, "Interrupt enable: %08x\n", @@ -996,6 +1015,7 @@ static void i915_instdone_info(struct drm_i915_private *dev_priv, struct seq_file *m, struct intel_instdone *instdone) { + const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; int slice; int subslice; @@ -1011,11 +1031,11 @@ static void i915_instdone_info(struct drm_i915_private *dev_priv, if (INTEL_GEN(dev_priv) <= 6) return; - for_each_instdone_slice_subslice(dev_priv, slice, subslice) + for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, instdone->sampler[slice][subslice]); - for_each_instdone_slice_subslice(dev_priv, slice, subslice) + for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, instdone->row[slice][subslice]); } @@ -1157,11 +1177,13 @@ static void print_rc6_res(struct seq_file *m, const char *title, const i915_reg_t reg) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_i915_private *i915 = node_to_i915(m->private); + intel_wakeref_t wakeref; - seq_printf(m, "%s %u (%llu us)\n", - title, I915_READ(reg), - intel_rc6_residency_us(dev_priv, reg)); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + seq_printf(m, "%s %u (%llu us)\n", title, + intel_uncore_read(&i915->uncore, reg), + intel_rc6_residency_us(&i915->gt.rc6, reg)); } static int vlv_drpc_info(struct seq_file *m) @@ -1478,21 +1500,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) static int i915_opregion(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct intel_opregion *opregion = &dev_priv->opregion; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - goto out; + struct intel_opregion *opregion = &node_to_i915(m->private)->opregion; if (opregion->header) seq_write(m, opregion->header, OPREGION_SIZE); - mutex_unlock(&dev->struct_mutex); - -out: return 0; } @@ -1512,11 +1524,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) struct drm_device *dev = &dev_priv->drm; struct intel_framebuffer *fbdev_fb = NULL; struct drm_framebuffer *drm_fb; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; #ifdef CONFIG_DRM_FBDEV_EMULATION if (dev_priv->fbdev && dev_priv->fbdev->helper.fb) { @@ -1551,7 +1558,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) seq_putc(m, '\n'); } mutex_unlock(&dev->mode_config.fb_lock); - mutex_unlock(&dev->struct_mutex); return 0; } @@ -1564,23 +1570,20 @@ static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) static int i915_context_status(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct i915_gem_context *ctx; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; + struct drm_i915_private *i915 = node_to_i915(m->private); + struct i915_gem_context *ctx, *cn; - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { struct i915_gem_engines_iter it; struct intel_context *ce; + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + spin_unlock(&i915->gem.contexts.lock); + seq_puts(m, "HW context "); - if (!list_empty(&ctx->hw_id_link)) - seq_printf(m, "%x [pin %u]", ctx->hw_id, - atomic_read(&ctx->hw_id_pin_count)); if (ctx->pid) { struct task_struct *task; @@ -1614,9 +1617,12 @@ static int i915_context_status(struct seq_file *m, void *unused) i915_gem_context_unlock_engines(ctx); seq_putc(m, '\n'); - } - mutex_unlock(&dev->struct_mutex); + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); + } + spin_unlock(&i915->gem.contexts.lock); return 0; } @@ -1654,9 +1660,9 @@ static int i915_swizzle_info(struct seq_file *m, void *data) wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); seq_printf(m, "bit6 swizzle for X-tiling = %s\n", - swizzle_string(dev_priv->mm.bit_6_swizzle_x)); + swizzle_string(dev_priv->ggtt.bit_6_swizzle_x)); seq_printf(m, "bit6 swizzle for Y-tiling = %s\n", - swizzle_string(dev_priv->mm.bit_6_swizzle_y)); + swizzle_string(dev_priv->ggtt.bit_6_swizzle_y)); if (IS_GEN_RANGE(dev_priv, 3, 4)) { seq_printf(m, "DDC = 0x%08x\n", @@ -2133,7 +2139,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) "BUF_ON", "TG_ON" }; - val = I915_READ(EDP_PSR2_STATUS); + val = I915_READ(EDP_PSR2_STATUS(dev_priv->psr.transcoder)); status_val = (val & EDP_PSR2_STATUS_STATE_MASK) >> EDP_PSR2_STATUS_STATE_SHIFT; if (status_val < ARRAY_SIZE(live_status)) @@ -2149,7 +2155,7 @@ psr_source_status(struct drm_i915_private *dev_priv, struct seq_file *m) "SRDOFFACK", "SRDENT_ON", }; - val = I915_READ(EDP_PSR_STATUS); + val = I915_READ(EDP_PSR_STATUS(dev_priv->psr.transcoder)); status_val = (val & EDP_PSR_STATUS_STATE_MASK) >> EDP_PSR_STATUS_STATE_SHIFT; if (status_val < ARRAY_SIZE(live_status)) @@ -2192,10 +2198,10 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) goto unlock; if (psr->psr2_enabled) { - val = I915_READ(EDP_PSR2_CTL); + val = I915_READ(EDP_PSR2_CTL(dev_priv->psr.transcoder)); enabled = val & EDP_PSR2_ENABLE; } else { - val = I915_READ(EDP_PSR_CTL); + val = I915_READ(EDP_PSR_CTL(dev_priv->psr.transcoder)); enabled = val & EDP_PSR_ENABLE; } seq_printf(m, "Source PSR ctl: %s [0x%08x]\n", @@ -2208,7 +2214,8 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) * SKL+ Perf counter is reset to 0 everytime DC state is entered */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - val = I915_READ(EDP_PSR_PERF_CNT) & EDP_PSR_PERF_CNT_MASK; + val = I915_READ(EDP_PSR_PERF_CNT(dev_priv->psr.transcoder)); + val &= EDP_PSR_PERF_CNT_MASK; seq_printf(m, "Performance counter: %u\n", val); } @@ -2226,8 +2233,11 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) * Reading all 3 registers before hand to minimize crossing a * frame boundary between register reads */ - for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame += 3) - su_frames_val[frame / 3] = I915_READ(PSR2_SU_STATUS(frame)); + for (frame = 0; frame < PSR2_SU_STATUS_FRAMES; frame += 3) { + val = I915_READ(PSR2_SU_STATUS(dev_priv->psr.transcoder, + frame)); + su_frames_val[frame / 3] = val; + } seq_puts(m, "Frame:\tPSR2 SU blocks:\n"); @@ -2360,8 +2370,7 @@ static int i915_power_domain_info(struct seq_file *m, void *unused) for_each_power_domain(power_domain, power_well->desc->domains) seq_printf(m, " %-23s %d\n", - intel_display_power_domain_str(dev_priv, - power_domain), + intel_display_power_domain_str(power_domain), power_domains->domain_use_count[power_domain]); } @@ -2396,6 +2405,13 @@ static int i915_dmc_info(struct seq_file *m, void *unused) if (INTEL_GEN(dev_priv) >= 12) { dc5_reg = TGL_DMC_DEBUG_DC5_COUNT; dc6_reg = TGL_DMC_DEBUG_DC6_COUNT; + /* + * NOTE: DMC_DEBUG3 is a general purpose reg. + * According to B.Specs:49196 DMC f/w reuses DC5/6 counter + * reg for DC3CO debugging and validation, + * but TGL DMC f/w is using DMC_DEBUG3 reg for DC3CO counter. + */ + seq_printf(m, "DC3CO count: %d\n", I915_READ(DMC_DEBUG3)); } else { dc5_reg = IS_BROXTON(dev_priv) ? BXT_CSR_DC3_DC5_COUNT : SKL_CSR_DC3_DC5_COUNT; @@ -3110,8 +3126,9 @@ static int i915_dp_mst_info(struct seq_file *m, void *unused) if (!intel_dig_port->dp.can_mst) continue; - seq_printf(m, "MST Source Port %c\n", - port_name(intel_dig_port->base.port)); + seq_printf(m, "MST Source Port [ENCODER:%d:%s]\n", + intel_dig_port->base.base.base.id, + intel_dig_port->base.base.name); drm_dp_mst_dump_topology(m, &intel_dig_port->dp.mst_mgr); } drm_connector_list_iter_end(&conn_iter); @@ -3573,6 +3590,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, i915_wedged_get, i915_wedged_set, "%llu\n"); +static int +i915_perf_noa_delay_set(void *data, u64 val) +{ + struct drm_i915_private *i915 = data; + const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz; + + /* + * This would lead to infinite waits as we're doing timestamp + * difference on the CS with only 32bits. + */ + if (val > mul_u32_u32(U32_MAX, clk)) + return -EINVAL; + + atomic64_set(&i915->perf.noa_programming_delay, val); + return 0; +} + +static int +i915_perf_noa_delay_get(void *data, u64 *val) +{ + struct drm_i915_private *i915 = data; + + *val = atomic64_read(&i915->perf.noa_programming_delay); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops, + i915_perf_noa_delay_get, + i915_perf_noa_delay_set, + "%llu\n"); + #define DROP_UNBOUND BIT(0) #define DROP_BOUND BIT(1) #define DROP_RETIRE BIT(2) @@ -3582,6 +3630,7 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, #define DROP_IDLE BIT(6) #define DROP_RESET_ACTIVE BIT(7) #define DROP_RESET_SEQNO BIT(8) +#define DROP_RCU BIT(9) #define DROP_ALL (DROP_UNBOUND | \ DROP_BOUND | \ DROP_RETIRE | \ @@ -3590,7 +3639,8 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops, DROP_SHRINK_ALL |\ DROP_IDLE | \ DROP_RESET_ACTIVE | \ - DROP_RESET_SEQNO) + DROP_RESET_SEQNO | \ + DROP_RCU) static int i915_drop_caches_get(void *data, u64 *val) { @@ -3603,53 +3653,33 @@ static int i915_drop_caches_set(void *data, u64 val) { struct drm_i915_private *i915 = data; + struct intel_gt *gt = &i915->gt; + int ret; DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", val, val & DROP_ALL); if (val & DROP_RESET_ACTIVE && - wait_for(intel_engines_are_idle(&i915->gt), - I915_IDLE_ENGINES_TIMEOUT)) - intel_gt_set_wedged(&i915->gt); + wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT)) + intel_gt_set_wedged(gt); - /* No need to check and wait for gpu resets, only libdrm auto-restarts - * on ioctls on -EAGAIN. */ - if (val & (DROP_ACTIVE | DROP_IDLE | DROP_RETIRE | DROP_RESET_SEQNO)) { - int ret; + if (val & DROP_RETIRE) + intel_gt_retire_requests(gt); - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); + if (val & (DROP_IDLE | DROP_ACTIVE)) { + ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); if (ret) return ret; + } - /* - * To finish the flush of the idle_worker, we must complete - * the switch-to-kernel-context, which requires a double - * pass through wait_for_idle: first queues the switch, - * second waits for the switch. - */ - if (ret == 0 && val & (DROP_IDLE | DROP_ACTIVE)) - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - if (ret == 0 && val & DROP_IDLE) - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - if (val & DROP_RETIRE) - i915_retire_requests(i915); - - mutex_unlock(&i915->drm.struct_mutex); - - if (ret == 0 && val & DROP_IDLE) - ret = intel_gt_pm_wait_for_idle(&i915->gt); + if (val & DROP_IDLE) { + ret = intel_gt_pm_wait_for_idle(gt); + if (ret) + return ret; } - if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt)) - intel_gt_handle_error(&i915->gt, ALL_ENGINES, 0, NULL); + if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt)) + intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL); fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) @@ -3662,10 +3692,8 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_shrink_all(i915); fs_reclaim_release(GFP_KERNEL); - if (val & DROP_IDLE) { - flush_delayed_work(&i915->gem.retire_work); - flush_work(&i915->gem.idle_work); - } + if (val & DROP_RCU) + rcu_barrier(); if (val & DROP_FREED) i915_gem_drain_freed_objects(i915); @@ -3721,6 +3749,15 @@ i915_cache_sharing_set(void *data, u64 val) return 0; } +static void +intel_sseu_copy_subslices(const struct sseu_dev_info *sseu, int slice, + u8 *to_mask) +{ + int offset = slice * sseu->ss_stride; + + memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride); +} + DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops, i915_cache_sharing_get, i915_cache_sharing_set, "%llu\n"); @@ -3794,12 +3831,13 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, continue; sseu->slice_mask |= BIT(s); - sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; + intel_sseu_copy_subslices(&info->sseu, s, sseu->subslice_mask); for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; - if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) + if (info->sseu.has_subslice_pg && + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) /* skip disabled subslice */ continue; @@ -3845,18 +3883,21 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); if (IS_GEN9_BC(dev_priv)) - sseu->subslice_mask[s] = - RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s]; + intel_sseu_copy_subslices(&info->sseu, s, + sseu->subslice_mask); for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; + u8 ss_idx = s * info->sseu.ss_stride + + ss / BITS_PER_BYTE; if (IS_GEN9_LP(dev_priv)) { if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) /* skip disabled subslice */ continue; - sseu->subslice_mask[s] |= BIT(ss); + sseu->subslice_mask[ss_idx] |= + BIT(ss % BITS_PER_BYTE); } eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & @@ -3873,25 +3914,23 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { + const struct intel_runtime_info *info = RUNTIME_INFO(dev_priv); u32 slice_info = I915_READ(GEN8_GT_SLICE_INFO); int s; sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; if (sseu->slice_mask) { - sseu->eu_per_subslice = - RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice; - for (s = 0; s < fls(sseu->slice_mask); s++) { - sseu->subslice_mask[s] = - RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s]; - } + sseu->eu_per_subslice = info->sseu.eu_per_subslice; + for (s = 0; s < fls(sseu->slice_mask); s++) + intel_sseu_copy_subslices(&info->sseu, s, + sseu->subslice_mask); sseu->eu_total = sseu->eu_per_subslice * intel_sseu_subslice_total(sseu); /* subtract fused off EU(s) from enabled slice(s) */ for (s = 0; s < fls(sseu->slice_mask); s++) { - u8 subslice_7eu = - RUNTIME_INFO(dev_priv)->sseu.subslice_7eu[s]; + u8 subslice_7eu = info->sseu.subslice_7eu[s]; sseu->eu_total -= hweight8(subslice_7eu); } @@ -3938,6 +3977,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, static int i915_sseu_status(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); + const struct intel_runtime_info *info = RUNTIME_INFO(dev_priv); struct sseu_dev_info sseu; intel_wakeref_t wakeref; @@ -3945,14 +3985,13 @@ static int i915_sseu_status(struct seq_file *m, void *unused) return -ENODEV; seq_puts(m, "SSEU Device Info\n"); - i915_print_sseu_info(m, true, &RUNTIME_INFO(dev_priv)->sseu); + i915_print_sseu_info(m, true, &info->sseu); seq_puts(m, "SSEU Device Status\n"); memset(&sseu, 0, sizeof(sseu)); - sseu.max_slices = RUNTIME_INFO(dev_priv)->sseu.max_slices; - sseu.max_subslices = RUNTIME_INFO(dev_priv)->sseu.max_subslices; - sseu.max_eus_per_subslice = - RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice; + intel_sseu_set_info(&sseu, info->sseu.max_slices, + info->sseu.max_subslices, + info->sseu.max_eus_per_subslice); with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) { if (IS_CHERRYVIEW(dev_priv)) @@ -3973,13 +4012,12 @@ static int i915_sseu_status(struct seq_file *m, void *unused) static int i915_forcewake_open(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; + struct intel_gt *gt = &i915->gt; - if (INTEL_GEN(i915) < 6) - return 0; - - file->private_data = - (void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm); - intel_uncore_forcewake_user_get(&i915->uncore); + atomic_inc(>->user_wakeref); + intel_gt_pm_get(gt); + if (INTEL_GEN(i915) >= 6) + intel_uncore_forcewake_user_get(gt->uncore); return 0; } @@ -3987,13 +4025,12 @@ static int i915_forcewake_open(struct inode *inode, struct file *file) static int i915_forcewake_release(struct inode *inode, struct file *file) { struct drm_i915_private *i915 = inode->i_private; + struct intel_gt *gt = &i915->gt; - if (INTEL_GEN(i915) < 6) - return 0; - - intel_uncore_forcewake_user_put(&i915->uncore); - intel_runtime_pm_put(&i915->runtime_pm, - (intel_wakeref_t)(uintptr_t)file->private_data); + if (INTEL_GEN(i915) >= 6) + intel_uncore_forcewake_user_put(&i915->uncore); + intel_gt_pm_put(gt); + atomic_dec(>->user_wakeref); return 0; } @@ -4339,6 +4376,7 @@ static const struct i915_debugfs_files { const char *name; const struct file_operations *fops; } i915_debugfs_files[] = { + {"i915_perf_noa_delay", &i915_perf_noa_delay_fops}, {"i915_wedged", &i915_wedged_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, {"i915_gem_drop_caches", &i915_drop_caches_fops}, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1c4ff8b5b0a2..157ed22052a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -36,7 +36,6 @@ #include <linux/pm_runtime.h> #include <linux/pnp.h> #include <linux/slab.h> -#include <linux/vgaarb.h> #include <linux/vga_switcheroo.h> #include <linux/vt.h> #include <acpi/video.h> @@ -54,11 +53,11 @@ #include "display/intel_display_types.h" #include "display/intel_dp.h" #include "display/intel_fbdev.h" -#include "display/intel_gmbus.h" #include "display/intel_hotplug.h" #include "display/intel_overlay.h" #include "display/intel_pipe_crc.h" #include "display/intel_sprite.h" +#include "display/intel_vga.h" #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" @@ -72,6 +71,7 @@ #include "i915_perf.h" #include "i915_query.h" #include "i915_suspend.h" +#include "i915_switcheroo.h" #include "i915_sysfs.h" #include "i915_trace.h" #include "i915_vgpu.h" @@ -269,159 +269,102 @@ intel_teardown_mchbar(struct drm_i915_private *dev_priv) release_resource(&dev_priv->mch_res); } -/* true = enable decode, false = disable decoder */ -static unsigned int i915_vga_set_decode(void *cookie, bool state) +static int i915_driver_modeset_probe(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = cookie; - - intel_modeset_vga_set_state(dev_priv, state); - if (state) - return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | - VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; - else - return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; -} - -static int i915_resume_switcheroo(struct drm_i915_private *i915); -static int i915_suspend_switcheroo(struct drm_i915_private *i915, - pm_message_t state); - -static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state) -{ - struct drm_i915_private *i915 = pdev_to_i915(pdev); - pm_message_t pmm = { .event = PM_EVENT_SUSPEND }; - - if (!i915) { - dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n"); - return; - } - - if (state == VGA_SWITCHEROO_ON) { - pr_info("switched on\n"); - i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING; - /* i915 resume handler doesn't set to D0 */ - pci_set_power_state(pdev, PCI_D0); - i915_resume_switcheroo(i915); - i915->drm.switch_power_state = DRM_SWITCH_POWER_ON; - } else { - pr_info("switched off\n"); - i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING; - i915_suspend_switcheroo(i915, pmm); - i915->drm.switch_power_state = DRM_SWITCH_POWER_OFF; - } -} - -static bool i915_switcheroo_can_switch(struct pci_dev *pdev) -{ - struct drm_i915_private *i915 = pdev_to_i915(pdev); - - /* - * FIXME: open_count is protected by drm_global_mutex but that would lead to - * locking inversion with the driver load path. And the access here is - * completely racy anyway. So don't bother with locking for now. - */ - return i915 && i915->drm.open_count == 0; -} - -static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { - .set_gpu_state = i915_switcheroo_set_state, - .reprobe = NULL, - .can_switch = i915_switcheroo_can_switch, -}; - -static int i915_driver_modeset_probe(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct pci_dev *pdev = dev_priv->drm.pdev; int ret; - if (i915_inject_probe_failure(dev_priv)) + if (i915_inject_probe_failure(i915)) return -ENODEV; - if (HAS_DISPLAY(dev_priv)) { - ret = drm_vblank_init(&dev_priv->drm, - INTEL_INFO(dev_priv)->num_pipes); + if (HAS_DISPLAY(i915) && INTEL_DISPLAY_ENABLED(i915)) { + ret = drm_vblank_init(&i915->drm, + INTEL_NUM_PIPES(i915)); if (ret) goto out; } - intel_bios_init(dev_priv); + intel_bios_init(i915); - /* If we have > 1 VGA cards, then we need to arbitrate access - * to the common VGA resources. - * - * If we are a secondary display controller (!PCI_DISPLAY_CLASS_VGA), - * then we do not take part in VGA arbitration and the - * vga_client_register() fails with -ENODEV. - */ - ret = vga_client_register(pdev, dev_priv, NULL, i915_vga_set_decode); - if (ret && ret != -ENODEV) + ret = intel_vga_register(i915); + if (ret) goto out; intel_register_dsm_handler(); - ret = vga_switcheroo_register_client(pdev, &i915_switcheroo_ops, false); + ret = i915_switcheroo_register(i915); if (ret) goto cleanup_vga_client; /* must happen before intel_power_domains_init_hw() on VLV/CHV */ - intel_update_rawclk(dev_priv); + intel_update_rawclk(i915); - intel_power_domains_init_hw(dev_priv, false); + intel_power_domains_init_hw(i915, false); - intel_csr_ucode_init(dev_priv); + intel_csr_ucode_init(i915); - ret = intel_irq_install(dev_priv); + ret = intel_irq_install(i915); if (ret) goto cleanup_csr; - intel_gmbus_setup(dev_priv); - /* Important: The output setup functions called by modeset_init need * working irqs for e.g. gmbus and dp aux transfers. */ - ret = intel_modeset_init(dev); + ret = intel_modeset_init(i915); if (ret) goto cleanup_irq; - ret = i915_gem_init(dev_priv); + ret = i915_gem_init(i915); if (ret) goto cleanup_modeset; - intel_overlay_setup(dev_priv); + intel_overlay_setup(i915); - if (!HAS_DISPLAY(dev_priv)) + if (!HAS_DISPLAY(i915) || !INTEL_DISPLAY_ENABLED(i915)) return 0; - ret = intel_fbdev_init(dev); + ret = intel_fbdev_init(&i915->drm); if (ret) goto cleanup_gem; /* Only enable hotplug handling once the fbdev is fully set up. */ - intel_hpd_init(dev_priv); + intel_hpd_init(i915); - intel_init_ipc(dev_priv); + intel_init_ipc(i915); return 0; cleanup_gem: - i915_gem_suspend(dev_priv); - i915_gem_driver_remove(dev_priv); - i915_gem_driver_release(dev_priv); + i915_gem_suspend(i915); + i915_gem_driver_remove(i915); + i915_gem_driver_release(i915); cleanup_modeset: - intel_modeset_driver_remove(dev); + intel_modeset_driver_remove(i915); cleanup_irq: - intel_irq_uninstall(dev_priv); - intel_gmbus_teardown(dev_priv); + intel_irq_uninstall(i915); cleanup_csr: - intel_csr_ucode_fini(dev_priv); - intel_power_domains_driver_remove(dev_priv); - vga_switcheroo_unregister_client(pdev); + intel_csr_ucode_fini(i915); + intel_power_domains_driver_remove(i915); + i915_switcheroo_unregister(i915); cleanup_vga_client: - vga_client_register(pdev, NULL, NULL, NULL); + intel_vga_unregister(i915); out: return ret; } +static void i915_driver_modeset_remove(struct drm_i915_private *i915) +{ + intel_modeset_driver_remove(i915); + + intel_irq_uninstall(i915); + + intel_bios_driver_remove(i915); + + i915_switcheroo_unregister(i915); + + intel_vga_unregister(i915); + + intel_csr_ucode_fini(i915); +} + static void intel_init_dpio(struct drm_i915_private *dev_priv) { /* @@ -576,9 +519,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_gt_init_early(&dev_priv->gt, dev_priv); - ret = i915_gem_init_early(dev_priv); - if (ret < 0) - goto err_gt; + i915_gem_init_early(dev_priv); /* This must be called before any calls to HAS_PCH_* */ intel_detect_pch(dev_priv); @@ -600,7 +541,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) err_gem: i915_gem_cleanup_early(dev_priv); -err_gt: intel_gt_driver_late_release(&dev_priv->gt); vlv_free_s0ix_state(dev_priv); err_workqueues: @@ -1135,8 +1075,8 @@ intel_get_dram_info(struct drm_i915_private *dev_priv) static u32 gen9_edram_size_mb(struct drm_i915_private *dev_priv, u32 cap) { - const unsigned int ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 }; - const unsigned int sets[4] = { 1, 1, 2, 2 }; + static const u8 ways[8] = { 4, 8, 12, 16, 16, 16, 16, 16 }; + static const u8 sets[4] = { 1, 1, 2, 2 }; return EDRAM_NUM_BANKS(cap) * ways[EDRAM_WAYS_IDX(cap)] * @@ -1232,7 +1172,7 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (ret) goto err_ggtt; - intel_gt_init_hw(dev_priv); + intel_gt_init_hw_early(dev_priv); ret = i915_ggtt_enable_hw(dev_priv); if (ret) { @@ -1279,9 +1219,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); - /* BIOS often leaves RC6 enabled, but disable it for hw init */ - intel_sanitize_gt_powersave(dev_priv); - intel_gt_init_workarounds(dev_priv); /* On the 945G/GM, the chipset reports the MSI capability on the @@ -1381,14 +1318,13 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) } else DRM_ERROR("Failed to register driver for userspace access!\n"); - if (HAS_DISPLAY(dev_priv)) { + if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) { /* Must be done after probing outputs */ intel_opregion_register(dev_priv); acpi_video_register(); } - if (IS_GEN(dev_priv, 5)) - intel_gpu_ips_init(dev_priv); + intel_gt_driver_register(&dev_priv->gt); intel_audio_init(dev_priv); @@ -1405,7 +1341,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) * We need to coordinate the hotplugs with the asynchronous fbdev * configuration, for which we use the fbdev->async_cookie. */ - if (HAS_DISPLAY(dev_priv)) + if (HAS_DISPLAY(dev_priv) && INTEL_DISPLAY_ENABLED(dev_priv)) drm_kms_helper_poll_init(dev); intel_power_domains_enable(dev_priv); @@ -1431,7 +1367,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) */ drm_kms_helper_poll_fini(&dev_priv->drm); - intel_gpu_ips_teardown(); + intel_gt_driver_unregister(&dev_priv->gt); acpi_video_unregister(); intel_opregion_unregister(dev_priv); @@ -1560,7 +1496,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret < 0) goto out_cleanup_mmio; - ret = i915_driver_modeset_probe(&dev_priv->drm); + ret = i915_driver_modeset_probe(dev_priv); if (ret < 0) goto out_cleanup_hw; @@ -1575,9 +1511,6 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent) out_cleanup_hw: i915_driver_hw_remove(dev_priv); i915_ggtt_driver_release(dev_priv); - - /* Paranoia: make sure we have disabled everything before we exit. */ - intel_sanitize_gt_powersave(dev_priv); out_cleanup_mmio: i915_driver_mmio_release(dev_priv); out_runtime_pm_put: @@ -1593,8 +1526,6 @@ out_fini: void i915_driver_remove(struct drm_i915_private *i915) { - struct pci_dev *pdev = i915->drm.pdev; - disable_rpm_wakeref_asserts(&i915->runtime_pm); i915_driver_unregister(i915); @@ -1615,14 +1546,7 @@ void i915_driver_remove(struct drm_i915_private *i915) intel_gvt_driver_remove(i915); - intel_modeset_driver_remove(&i915->drm); - - intel_bios_driver_remove(i915); - - vga_switcheroo_unregister_client(pdev); - vga_client_register(pdev, NULL, NULL, NULL); - - intel_csr_ucode_fini(i915); + i915_driver_modeset_remove(i915); /* Free error state after interrupts are fully disabled. */ cancel_delayed_work_sync(&i915->gt.hangcheck.work); @@ -1648,9 +1572,6 @@ static void i915_driver_release(struct drm_device *dev) i915_ggtt_driver_release(dev_priv); - /* Paranoia: make sure we have disabled everything before we exit. */ - intel_sanitize_gt_powersave(dev_priv); - i915_driver_mmio_release(dev_priv); enable_rpm_wakeref_asserts(rpm); @@ -1694,12 +1615,10 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; - mutex_lock(&dev->struct_mutex); i915_gem_context_close(file); i915_gem_release(dev, file); - mutex_unlock(&dev->struct_mutex); - kfree(file_priv); + kfree_rcu(file_priv, rcu); /* Catch up with all the deferred frees from "this" client */ i915_gem_flush_free_objects(to_i915(dev)); @@ -1854,8 +1773,7 @@ out: return ret; } -static int -i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state) +int i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state) { int error; @@ -1879,7 +1797,7 @@ static int i915_drm_resume(struct drm_device *dev) int ret; disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); - intel_sanitize_gt_powersave(dev_priv); + intel_gt_pm_disable(&dev_priv->gt); i915_gem_sanitize(dev_priv); @@ -1887,6 +1805,9 @@ static int i915_drm_resume(struct drm_device *dev) if (ret) DRM_ERROR("failed to re-enable GGTT\n"); + i915_gem_restore_gtt_mappings(dev_priv); + i915_gem_restore_fences(&dev_priv->ggtt); + intel_csr_ucode_resume(dev_priv); i915_restore_state(dev_priv); @@ -1910,7 +1831,7 @@ static int i915_drm_resume(struct drm_device *dev) i915_gem_resume(dev_priv); - intel_modeset_init_hw(dev); + intel_modeset_init_hw(dev_priv); intel_init_clock_gating(dev_priv); spin_lock_irq(&dev_priv->irq_lock); @@ -2007,7 +1928,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_display_power_resume_early(dev_priv); - intel_sanitize_gt_powersave(dev_priv); + intel_gt_pm_disable(&dev_priv->gt); intel_power_domains_resume(dev_priv); @@ -2018,7 +1939,7 @@ static int i915_drm_resume_early(struct drm_device *dev) return ret; } -static int i915_resume_switcheroo(struct drm_i915_private *i915) +int i915_resume_switcheroo(struct drm_i915_private *i915) { int ret; @@ -2551,9 +2472,6 @@ static int intel_runtime_suspend(struct device *kdev) struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; int ret = 0; - if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && HAS_RC6(dev_priv)))) - return -ENODEV; - if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv))) return -ENODEV; @@ -2586,7 +2504,7 @@ static int intel_runtime_suspend(struct device *kdev) intel_gt_runtime_resume(&dev_priv->gt); - i915_gem_restore_fences(dev_priv); + i915_gem_restore_fences(&dev_priv->ggtt); enable_rpm_wakeref_asserts(rpm); @@ -2666,7 +2584,7 @@ static int intel_runtime_resume(struct device *kdev) * we can do is to hope that things will still work (and disable RPM). */ intel_gt_runtime_resume(&dev_priv->gt); - i915_gem_restore_fences(dev_priv); + i915_gem_restore_fences(&dev_priv->ggtt); /* * On VLV/CHV display interrupts are part of the display diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 772154e4073e..8882c0908c3b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -67,6 +67,7 @@ #include "display/intel_display.h" #include "display/intel_display_power.h" #include "display/intel_dpll_mgr.h" +#include "display/intel_dsb.h" #include "display/intel_frontbuffer.h" #include "display/intel_gmbus.h" #include "display/intel_opregion.h" @@ -84,6 +85,7 @@ #include "intel_device_info.h" #include "intel_pch.h" #include "intel_runtime_pm.h" +#include "intel_memory_region.h" #include "intel_uncore.h" #include "intel_wakeref.h" #include "intel_wopcm.h" @@ -92,6 +94,7 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_gtt.h" #include "i915_gpu_error.h" +#include "i915_perf_types.h" #include "i915_request.h" #include "i915_scheduler.h" #include "gt/intel_timeline.h" @@ -105,8 +108,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20190822" -#define DRIVER_TIMESTAMP 1566477988 +#define DRIVER_DATE "20191021" +#define DRIVER_TIMESTAMP 1571651766 struct drm_i915_gem_object; @@ -185,7 +188,11 @@ struct i915_mmu_object; struct drm_i915_file_private { struct drm_i915_private *dev_priv; - struct drm_file *file; + + union { + struct drm_file *file; + struct rcu_head rcu; + }; struct { spinlock_t lock; @@ -272,6 +279,7 @@ struct drm_i915_display_funcs { int (*compute_global_watermarks)(struct intel_atomic_state *state); void (*update_wm)(struct intel_crtc *crtc); int (*modeset_calc_cdclk)(struct intel_atomic_state *state); + u8 (*calc_voltage_level)(int cdclk); /* Returns the active state of the crtc, and if the crtc is active, * fills out the pipe-config with the hw state. */ bool (*get_pipe_config)(struct intel_crtc *, @@ -284,7 +292,8 @@ struct drm_i915_display_funcs { struct intel_atomic_state *old_state); void (*crtc_disable)(struct intel_crtc_state *old_crtc_state, struct intel_atomic_state *old_state); - void (*update_crtcs)(struct intel_atomic_state *state); + void (*commit_modeset_enables)(struct intel_atomic_state *state); + void (*commit_modeset_disables)(struct intel_atomic_state *state); void (*audio_codec_enable)(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); @@ -331,6 +340,7 @@ struct intel_csr { i915_reg_t mmioaddr[20]; u32 mmiodata[20]; u32 dc_state; + u32 target_dc_state; u32 allowed_dc_mask; intel_wakeref_t wakeref; }; @@ -479,6 +489,7 @@ struct i915_psr { bool enabled; struct intel_dp *dp; enum pipe pipe; + enum transcoder transcoder; bool active; struct work_struct work; unsigned busy_frontbuffer_bits; @@ -492,6 +503,9 @@ struct i915_psr { bool sink_not_reliable; bool irq_aux_error; u16 su_x_granularity; + bool dc3co_enabled; + u32 dc3co_exit_delay; + struct delayed_work idle_work; }; #define QUIRK_LVDS_SSC_DISABLE (1<<1) @@ -591,20 +605,8 @@ struct intel_rps { struct intel_rps_ei ei; }; -struct intel_rc6 { - bool enabled; - u64 prev_hw_residency[4]; - u64 cur_residency[4]; -}; - -struct intel_llc_pstate { - bool enabled; -}; - struct intel_gen6_power_mgmt { struct intel_rps rps; - struct intel_rc6 rc6; - struct intel_llc_pstate llc_pstate; }; /* defined intel_pm.c */ @@ -677,6 +679,8 @@ struct i915_gem_mm { */ struct vfsmount *gemfs; + struct intel_memory_region *regions[INTEL_REGION_UNKNOWN]; + struct notifier_block oom_notifier; struct notifier_block vmap_notifier; struct shrinker shrinker; @@ -688,11 +692,6 @@ struct i915_gem_mm { */ struct workqueue_struct *userptr_wq; - /** Bit 6 swizzling required for X tiling */ - u32 bit_6_swizzle_x; - /** Bit 6 swizzling required for Y tiling */ - u32 bit_6_swizzle_y; - /* shrinker accounting, also useful for userland debugging */ u64 shrink_memory; u32 shrink_count; @@ -973,305 +972,6 @@ struct intel_wm_config { bool sprites_scaled; }; -struct i915_oa_format { - u32 format; - int size; -}; - -struct i915_oa_reg { - i915_reg_t addr; - u32 value; -}; - -struct i915_oa_config { - char uuid[UUID_STRING_LEN + 1]; - int id; - - const struct i915_oa_reg *mux_regs; - u32 mux_regs_len; - const struct i915_oa_reg *b_counter_regs; - u32 b_counter_regs_len; - const struct i915_oa_reg *flex_regs; - u32 flex_regs_len; - - struct attribute_group sysfs_metric; - struct attribute *attrs[2]; - struct device_attribute sysfs_metric_id; - - atomic_t ref_count; -}; - -struct i915_perf_stream; - -/** - * struct i915_perf_stream_ops - the OPs to support a specific stream type - */ -struct i915_perf_stream_ops { - /** - * @enable: Enables the collection of HW samples, either in response to - * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened - * without `I915_PERF_FLAG_DISABLED`. - */ - void (*enable)(struct i915_perf_stream *stream); - - /** - * @disable: Disables the collection of HW samples, either in response - * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying - * the stream. - */ - void (*disable)(struct i915_perf_stream *stream); - - /** - * @poll_wait: Call poll_wait, passing a wait queue that will be woken - * once there is something ready to read() for the stream - */ - void (*poll_wait)(struct i915_perf_stream *stream, - struct file *file, - poll_table *wait); - - /** - * @wait_unlocked: For handling a blocking read, wait until there is - * something to ready to read() for the stream. E.g. wait on the same - * wait queue that would be passed to poll_wait(). - */ - int (*wait_unlocked)(struct i915_perf_stream *stream); - - /** - * @read: Copy buffered metrics as records to userspace - * **buf**: the userspace, destination buffer - * **count**: the number of bytes to copy, requested by userspace - * **offset**: zero at the start of the read, updated as the read - * proceeds, it represents how many bytes have been copied so far and - * the buffer offset for copying the next record. - * - * Copy as many buffered i915 perf samples and records for this stream - * to userspace as will fit in the given buffer. - * - * Only write complete records; returning -%ENOSPC if there isn't room - * for a complete record. - * - * Return any error condition that results in a short read such as - * -%ENOSPC or -%EFAULT, even though these may be squashed before - * returning to userspace. - */ - int (*read)(struct i915_perf_stream *stream, - char __user *buf, - size_t count, - size_t *offset); - - /** - * @destroy: Cleanup any stream specific resources. - * - * The stream will always be disabled before this is called. - */ - void (*destroy)(struct i915_perf_stream *stream); -}; - -/** - * struct i915_perf_stream - state for a single open stream FD - */ -struct i915_perf_stream { - /** - * @dev_priv: i915 drm device - */ - struct drm_i915_private *dev_priv; - - /** - * @link: Links the stream into ``&drm_i915_private->streams`` - */ - struct list_head link; - - /** - * @wakeref: As we keep the device awake while the perf stream is - * active, we track our runtime pm reference for later release. - */ - intel_wakeref_t wakeref; - - /** - * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` - * properties given when opening a stream, representing the contents - * of a single sample as read() by userspace. - */ - u32 sample_flags; - - /** - * @sample_size: Considering the configured contents of a sample - * combined with the required header size, this is the total size - * of a single sample record. - */ - int sample_size; - - /** - * @ctx: %NULL if measuring system-wide across all contexts or a - * specific context that is being monitored. - */ - struct i915_gem_context *ctx; - - /** - * @enabled: Whether the stream is currently enabled, considering - * whether the stream was opened in a disabled state and based - * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. - */ - bool enabled; - - /** - * @ops: The callbacks providing the implementation of this specific - * type of configured stream. - */ - const struct i915_perf_stream_ops *ops; - - /** - * @oa_config: The OA configuration used by the stream. - */ - struct i915_oa_config *oa_config; - - /** - * The OA context specific information. - */ - struct intel_context *pinned_ctx; - u32 specific_ctx_id; - u32 specific_ctx_id_mask; - - struct hrtimer poll_check_timer; - wait_queue_head_t poll_wq; - bool pollin; - - bool periodic; - int period_exponent; - - /** - * State of the OA buffer. - */ - struct { - struct i915_vma *vma; - u8 *vaddr; - u32 last_ctx_id; - int format; - int format_size; - int size_exponent; - - /** - * Locks reads and writes to all head/tail state - * - * Consider: the head and tail pointer state needs to be read - * consistently from a hrtimer callback (atomic context) and - * read() fop (user context) with tail pointer updates happening - * in atomic context and head updates in user context and the - * (unlikely) possibility of read() errors needing to reset all - * head/tail state. - * - * Note: Contention/performance aren't currently a significant - * concern here considering the relatively low frequency of - * hrtimer callbacks (5ms period) and that reads typically only - * happen in response to a hrtimer event and likely complete - * before the next callback. - * - * Note: This lock is not held *while* reading and copying data - * to userspace so the value of head observed in htrimer - * callbacks won't represent any partial consumption of data. - */ - spinlock_t ptr_lock; - - /** - * One 'aging' tail pointer and one 'aged' tail pointer ready to - * used for reading. - * - * Initial values of 0xffffffff are invalid and imply that an - * update is required (and should be ignored by an attempted - * read) - */ - struct { - u32 offset; - } tails[2]; - - /** - * Index for the aged tail ready to read() data up to. - */ - unsigned int aged_tail_idx; - - /** - * A monotonic timestamp for when the current aging tail pointer - * was read; used to determine when it is old enough to trust. - */ - u64 aging_timestamp; - - /** - * Although we can always read back the head pointer register, - * we prefer to avoid trusting the HW state, just to avoid any - * risk that some hardware condition could * somehow bump the - * head pointer unpredictably and cause us to forward the wrong - * OA buffer data to userspace. - */ - u32 head; - } oa_buffer; -}; - -/** - * struct i915_oa_ops - Gen specific implementation of an OA unit stream - */ -struct i915_oa_ops { - /** - * @is_valid_b_counter_reg: Validates register's address for - * programming boolean counters for a particular platform. - */ - bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv, - u32 addr); - - /** - * @is_valid_mux_reg: Validates register's address for programming mux - * for a particular platform. - */ - bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr); - - /** - * @is_valid_flex_reg: Validates register's address for programming - * flex EU filtering for a particular platform. - */ - bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr); - - /** - * @enable_metric_set: Selects and applies any MUX configuration to set - * up the Boolean and Custom (B/C) counters that are part of the - * counter reports being sampled. May apply system constraints such as - * disabling EU clock gating as required. - */ - int (*enable_metric_set)(struct i915_perf_stream *stream); - - /** - * @disable_metric_set: Remove system constraints associated with using - * the OA unit. - */ - void (*disable_metric_set)(struct i915_perf_stream *stream); - - /** - * @oa_enable: Enable periodic sampling - */ - void (*oa_enable)(struct i915_perf_stream *stream); - - /** - * @oa_disable: Disable periodic sampling - */ - void (*oa_disable)(struct i915_perf_stream *stream); - - /** - * @read: Copy data from the circular OA buffer into a given userspace - * buffer. - */ - int (*read)(struct i915_perf_stream *stream, - char __user *buf, - size_t count, - size_t *offset); - - /** - * @oa_hw_tail_read: read the OA tail pointer register - * - * In particular this enables us to share all the fiddly code for - * handling the OA unit tail pointer race that affects multiple - * generations. - */ - u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); -}; - struct intel_cdclk_state { unsigned int cdclk, vco, ref, bypass; u8 voltage_level; @@ -1331,11 +1031,11 @@ struct drm_i915_private { */ u32 gpio_mmio_base; + u32 hsw_psr_mmio_adjust; + /* MMIO base address for MIPI regs */ u32 mipi_mmio_base; - u32 psr_mmio_base; - u32 pps_mmio_base; wait_queue_head_t gmbus_wait_queue; @@ -1414,6 +1114,9 @@ struct drm_i915_private { /* The current hardware cdclk state */ struct intel_cdclk_state hw; + /* cdclk, divider, and ratio table from bspec */ + const struct intel_cdclk_vals *table; + int force_min_cdclk; } cdclk; @@ -1428,6 +1131,8 @@ struct drm_i915_private { /* ordered wq for modesets */ struct workqueue_struct *modeset_wq; + /* unbound hipri wq for page flips/plane updates */ + struct workqueue_struct *flip_wq; /* Display functions */ struct drm_i915_display_funcs display; @@ -1468,7 +1173,7 @@ struct drm_i915_private { */ struct mutex dpll_lock; - unsigned int active_crtcs; + u8 active_pipes; /* minimum acceptable cdclk for each pipe */ int min_cdclk[I915_MAX_PIPES]; /* minimum acceptable voltage level for each pipe */ @@ -1528,25 +1233,7 @@ struct drm_i915_private { */ struct mutex av_mutex; int audio_power_refcount; - - struct { - struct mutex mutex; - struct list_head list; - struct llist_head free_list; - struct work_struct free_work; - - /* The hw wants to have a stable context identifier for the - * lifetime of the context (for OA, PASID, faults, etc). - * This is limited in execlists to 21 bits. - */ - struct ida hw_ida; -#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ -#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ -/* in Gen12 ID 0x7FF is reserved to indicate idle */ -#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) - struct list_head hw_id_list; - } contexts; + u32 audio_freq_cntrl; u32 fdi_rx_config; @@ -1572,6 +1259,8 @@ struct drm_i915_private { I915_SAGV_NOT_CONTROLLED } sagv_status; + u32 sagv_block_time_us; + struct { /* * Raw watermark latency values: @@ -1642,61 +1331,7 @@ struct drm_i915_private { struct intel_runtime_pm runtime_pm; - struct { - bool initialized; - - struct kobject *metrics_kobj; - struct ctl_table_header *sysctl_header; - - /* - * Lock associated with adding/modifying/removing OA configs - * in dev_priv->perf.metrics_idr. - */ - struct mutex metrics_lock; - - /* - * List of dynamic configurations, you need to hold - * dev_priv->perf.metrics_lock to access it. - */ - struct idr metrics_idr; - - /* - * Lock associated with anything below within this structure - * except exclusive_stream. - */ - struct mutex lock; - struct list_head streams; - - /* - * The stream currently using the OA unit. If accessed - * outside a syscall associated to its file - * descriptor, you need to hold - * dev_priv->drm.struct_mutex. - */ - struct i915_perf_stream *exclusive_stream; - - /** - * For rate limiting any notifications of spurious - * invalid OA reports - */ - struct ratelimit_state spurious_report_rs; - - struct i915_oa_config test_config; - - u32 gen7_latched_oastatus1; - u32 ctx_oactxctrl_offset; - u32 ctx_flexeu0_offset; - - /** - * The RPT_ID/reason field for Gen8+ includes a bit - * to determine if the CTX ID in the report is valid - * but the specific bit differs between Gen 8 and 9 - */ - u32 gen8_valid_ctx_bit; - - struct i915_oa_ops ops; - const struct i915_oa_format *oa_formats; - } perf; + struct i915_perf perf; /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct intel_gt gt; @@ -1704,32 +1339,17 @@ struct drm_i915_private { struct { struct notifier_block pm_notifier; - /** - * We leave the user IRQ off as much as possible, - * but this means that requests will finish and never - * be retired once the system goes idle. Set a timer to - * fire periodically while the ring is running. When it - * fires, go retire requests. - */ - struct delayed_work retire_work; - - /** - * When we detect an idle GPU, we want to turn on - * powersaving features. So once we see that there - * are no more requests outstanding and no more - * arrive within a small period of time, we fire - * off the idle_work. - */ - struct work_struct idle_work; + struct i915_gem_contexts { + spinlock_t lock; /* locks list */ + struct list_head list; + + struct llist_head free_list; + struct work_struct free_work; + } contexts; } gem; - /* For i945gm vblank irq vs. C3 workaround */ - struct { - struct work_struct work; - struct pm_qos_request pm_qos; - u8 c3_disable_latency; - u8 enabled; - } i945gm_vblank; + /* For i915gm/i945gm vblank irq workaround */ + u8 vblank_enabled; /* perform PHY state sanity checks? */ bool chv_phy_assert[2]; @@ -1792,10 +1412,10 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) for_each_if ((engine__) = (dev_priv__)->engine[(id__)]) /* Iterator over subset of engines selected by mask */ -#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \ - for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->engine_mask; \ +#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \ + for ((tmp__) = (mask__) & INTEL_INFO((gt__)->i915)->engine_mask; \ (tmp__) ? \ - ((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \ + ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \ 0;) #define rb_to_uabi_engine(rb) \ @@ -1851,6 +1471,8 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(n)) + \ INTEL_INFO(dev_priv)->gen == (n)) +#define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb) + /* * Return true if revision is in range [since,until] inclusive. * @@ -2056,6 +1678,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ICL_REVID(p, since, until) \ (IS_ICELAKE(p) && IS_REVID(p, since, until)) +#define TGL_REVID_A0 0x0 + +#define IS_TGL_REVID(p, since, until) \ + (IS_TIGERLAKE(p) && IS_REVID(p, since, until)) + #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) #define IS_GEN9_LP(dev_priv) (IS_GEN(dev_priv, 9) && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (IS_GEN(dev_priv, 9) && !IS_LP(dev_priv)) @@ -2153,6 +1780,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) +#define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) + #define HAS_GT_UC(dev_priv) (INTEL_INFO(dev_priv)->has_gt_uc) /* Having GuC is not the same as using GuC */ @@ -2176,7 +1805,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 -#define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->num_pipes > 0) +#define INTEL_NUM_PIPES(dev_priv) (hweight8(INTEL_INFO(dev_priv)->pipe_mask)) + +#define HAS_DISPLAY(dev_priv) (INTEL_INFO(dev_priv)->pipe_mask != 0) + +/* Only valid when HAS_DISPLAY() is true */ +#define INTEL_DISPLAY_ENABLED(dev_priv) (WARN_ON(!HAS_DISPLAY(dev_priv)), !i915_modparams.disable_display) static inline bool intel_vtd_active(void) { @@ -2209,6 +1843,9 @@ extern const struct dev_pm_ops i915_pm_ops; int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent); void i915_driver_remove(struct drm_i915_private *i915); +int i915_resume_switcheroo(struct drm_i915_private *i915); +int i915_suspend_switcheroo(struct drm_i915_private *i915, pm_message_t state); + void intel_engine_init_hangcheck(struct intel_engine_cs *engine); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); @@ -2229,11 +1866,13 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, int i915_gem_init_userptr(struct drm_i915_private *dev_priv); void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv); void i915_gem_sanitize(struct drm_i915_private *i915); -int i915_gem_init_early(struct drm_i915_private *dev_priv); +void i915_gem_init_early(struct drm_i915_private *dev_priv); void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); int i915_gem_freeze(struct drm_i915_private *dev_priv); int i915_gem_freeze_late(struct drm_i915_private *dev_priv); +struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915); + static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) { /* @@ -2312,13 +1951,10 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); -int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); void i915_gem_driver_register(struct drm_i915_private *i915); void i915_gem_driver_unregister(struct drm_i915_private *i915); void i915_gem_driver_remove(struct drm_i915_private *dev_priv); void i915_gem_driver_release(struct drm_i915_private *dev_priv); -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags, long timeout); void i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); @@ -2358,7 +1994,7 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, u64 min_size, u64 alignment, - unsigned cache_level, + unsigned long color, u64 start, u64 end, unsigned flags); int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, @@ -2366,6 +2002,9 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm); +void i915_gem_cleanup_memory_regions(struct drm_i915_private *i915); +int i915_gem_init_memory_regions(struct drm_i915_private *i915); + /* i915_gem_internal.c */ struct drm_i915_gem_object * i915_gem_object_create_internal(struct drm_i915_private *dev_priv, @@ -2374,9 +2013,9 @@ i915_gem_object_create_internal(struct drm_i915_private *dev_priv, /* i915_gem_tiling.c */ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_i915_private *i915 = to_i915(obj->base.dev); - return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && + return i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && i915_gem_object_is_tiled(obj); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 814f62fca727..dd0a3271b4e2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -45,7 +45,6 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_pm.h" -#include "gem/i915_gemfs.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" @@ -62,20 +61,31 @@ #include "intel_pm.h" static int -insert_mappable_node(struct i915_ggtt *ggtt, - struct drm_mm_node *node, u32 size) +insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) { + int err; + + err = mutex_lock_interruptible(&ggtt->vm.mutex); + if (err) + return err; + memset(node, 0, sizeof(*node)); - return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, - size, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); + err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, + size, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + + mutex_unlock(&ggtt->vm.mutex); + + return err; } static void -remove_mappable_node(struct drm_mm_node *node) +remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) { + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(node); + mutex_unlock(&ggtt->vm.mutex); } int @@ -87,7 +97,8 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct i915_vma *vma; u64 pinned; - mutex_lock(&ggtt->vm.mutex); + if (mutex_lock_interruptible(&ggtt->vm.mutex)) + return -EINTR; pinned = ggtt->vm.reserved; list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) @@ -109,20 +120,24 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj, LIST_HEAD(still_in_list); int ret = 0; - lockdep_assert_held(&obj->base.dev->struct_mutex); - spin_lock(&obj->vma.lock); while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, struct i915_vma, obj_link))) { + struct i915_address_space *vm = vma->vm; + + ret = -EBUSY; + if (!i915_vm_tryopen(vm)) + break; + list_move_tail(&vma->obj_link, &still_in_list); spin_unlock(&obj->vma.lock); - ret = -EBUSY; if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || !i915_vma_is_active(vma)) ret = i915_vma_unbind(vma); + i915_vm_close(vm); spin_lock(&obj->vma.lock); } list_splice(&still_in_list, &obj->vma.list); @@ -338,10 +353,6 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, u64 remain, offset; int ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); vma = ERR_PTR(-ENODEV); if (!i915_gem_object_is_tiled(obj)) @@ -355,12 +366,10 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, } else { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) - goto out_unlock; + goto out_rpm; GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - mutex_unlock(&i915->drm.struct_mutex); - ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; @@ -414,17 +423,14 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, i915_gem_object_unlock_fence(obj, fence); out_unpin: - mutex_lock(&i915->drm.struct_mutex); if (drm_mm_node_allocated(&node)) { ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(&node); + remove_mappable_node(ggtt, &node); } else { i915_vma_unpin(vma); } -out_unlock: +out_rpm: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return ret; } @@ -531,10 +537,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, void __user *user_data; int ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - if (i915_gem_object_has_struct_page(obj)) { /* * Avoid waking the device up if we can fallback, as @@ -544,10 +546,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, * using the cache bypass of indirect GGTT access. */ wakeref = intel_runtime_pm_get_if_in_use(rpm); - if (!wakeref) { - ret = -EFAULT; - goto out_unlock; - } + if (!wakeref) + return -EFAULT; } else { /* No backing pages, no fallback, we must force GGTT access */ wakeref = intel_runtime_pm_get(rpm); @@ -569,8 +569,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - mutex_unlock(&i915->drm.struct_mutex); - ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; @@ -634,18 +632,15 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, i915_gem_object_unlock_fence(obj, fence); out_unpin: - mutex_lock(&i915->drm.struct_mutex); intel_gt_flush_ggtt_writes(ggtt->vm.gt); if (drm_mm_node_allocated(&node)) { ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(&node); + remove_mappable_node(ggtt, &node); } else { i915_vma_unpin(vma); } out_rpm: intel_runtime_pm_put(rpm, wakeref); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -887,74 +882,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) } } -static long -wait_for_timelines(struct drm_i915_private *i915, - unsigned int wait, long timeout) -{ - struct intel_gt_timelines *timelines = &i915->gt.timelines; - struct intel_timeline *tl; - unsigned long flags; - - spin_lock_irqsave(&timelines->lock, flags); - list_for_each_entry(tl, &timelines->active_list, link) { - struct i915_request *rq; - - rq = i915_active_request_get_unlocked(&tl->last_request); - if (!rq) - continue; - - spin_unlock_irqrestore(&timelines->lock, flags); - - /* - * "Race-to-idle". - * - * Switching to the kernel context is often used a synchronous - * step prior to idling, e.g. in suspend for flushing all - * current operations to memory before sleeping. These we - * want to complete as quickly as possible to avoid prolonged - * stalls, so allow the gpu to boost to maximum clocks. - */ - if (wait & I915_WAIT_FOR_IDLE_BOOST) - gen6_rps_boost(rq); - - timeout = i915_request_wait(rq, wait, timeout); - i915_request_put(rq); - if (timeout < 0) - return timeout; - - /* restart after reacquiring the lock */ - spin_lock_irqsave(&timelines->lock, flags); - tl = list_entry(&timelines->active_list, typeof(*tl), link); - } - spin_unlock_irqrestore(&timelines->lock, flags); - - return timeout; -} - -int i915_gem_wait_for_idle(struct drm_i915_private *i915, - unsigned int flags, long timeout) -{ - /* If the device is asleep, we have no requests outstanding */ - if (!intel_gt_pm_is_awake(&i915->gt)) - return 0; - - GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", - flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", - timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); - - timeout = wait_for_timelines(i915, flags, timeout); - if (timeout < 0) - return timeout; - - if (flags & I915_WAIT_LOCKED) { - lockdep_assert_held(&i915->drm.struct_mutex); - - i915_retire_requests(i915); - } - - return 0; -} - struct i915_vma * i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, @@ -967,7 +894,8 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); + if (i915_gem_object_never_bind_ggtt(obj)) + return ERR_PTR(-ENODEV); if (flags & PIN_MAPPABLE && (!view || view->type == I915_GGTT_VIEW_NORMAL)) { @@ -1015,13 +943,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOSPC); } - WARN(i915_vma_is_pinned(vma), - "bo is already pinned in ggtt with incorrect alignment:" - " offset=%08x, req.alignment=%llx," - " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", - i915_ggtt_offset(vma), alignment, - !!(flags & PIN_MAPPABLE), - i915_vma_is_map_and_fenceable(vma)); ret = i915_vma_unbind(vma); if (ret) return ERR_PTR(ret); @@ -1148,95 +1069,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915) intel_runtime_pm_put(&i915->runtime_pm, wakeref); } -static void init_unused_ring(struct intel_gt *gt, u32 base) -{ - struct intel_uncore *uncore = gt->uncore; - - intel_uncore_write(uncore, RING_CTL(base), 0); - intel_uncore_write(uncore, RING_HEAD(base), 0); - intel_uncore_write(uncore, RING_TAIL(base), 0); - intel_uncore_write(uncore, RING_START(base), 0); -} - -static void init_unused_rings(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - - if (IS_I830(i915)) { - init_unused_ring(gt, PRB1_BASE); - init_unused_ring(gt, SRB0_BASE); - init_unused_ring(gt, SRB1_BASE); - init_unused_ring(gt, SRB2_BASE); - init_unused_ring(gt, SRB3_BASE); - } else if (IS_GEN(i915, 2)) { - init_unused_ring(gt, SRB0_BASE); - init_unused_ring(gt, SRB1_BASE); - } else if (IS_GEN(i915, 3)) { - init_unused_ring(gt, PRB1_BASE); - init_unused_ring(gt, PRB2_BASE); - } -} - -int i915_gem_init_hw(struct drm_i915_private *i915) -{ - struct intel_uncore *uncore = &i915->uncore; - struct intel_gt *gt = &i915->gt; - int ret; - - BUG_ON(!i915->kernel_context); - ret = intel_gt_terminally_wedged(gt); - if (ret) - return ret; - - gt->last_init_time = ktime_get(); - - /* Double layer security blanket, see i915_gem_init() */ - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - - if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9) - intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf)); - - if (IS_HASWELL(i915)) - intel_uncore_write(uncore, - MI_PREDICATE_RESULT_2, - IS_HSW_GT3(i915) ? - LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED); - - /* Apply the GT workarounds... */ - intel_gt_apply_workarounds(gt); - /* ...and determine whether they are sticking. */ - intel_gt_verify_workarounds(gt, "init"); - - intel_gt_init_swizzling(gt); - - /* - * At least 830 can leave some of the unused rings - * "active" (ie. head != tail) after resume which - * will prevent c3 entry. Makes sure all unused rings - * are totally idle. - */ - init_unused_rings(gt); - - ret = i915_ppgtt_init_hw(gt); - if (ret) { - DRM_ERROR("Enabling PPGTT failed (%d)\n", ret); - goto out; - } - - /* We can't enable contexts until all firmware is loaded */ - ret = intel_uc_init_hw(>->uc); - if (ret) { - i915_probe_error(i915, "Enabling uc failed (%d)\n", ret); - goto out; - } - - intel_mocs_init(gt); - -out: - intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL); - return ret; -} - static int __intel_engines_record_defaults(struct drm_i915_private *i915) { struct i915_request *requests[I915_NUM_ENGINES] = {}; @@ -1278,15 +1110,6 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (err) goto err_rq; - /* - * Failing to program the MOCS is non-fatal.The system will not - * run at peak performance. So warn the user and carry on. - */ - err = intel_mocs_emit(rq); - if (err) - dev_notice(i915->drm.dev, - "Failed to program MOCS registers; expect performance issues.\n"); - err = intel_renderstate_emit(rq); if (err) goto err_rq; @@ -1375,17 +1198,6 @@ out: return err; } -static int -i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size) -{ - return intel_gt_init_scratch(&i915->gt, size); -} - -static void i915_gem_fini_scratch(struct drm_i915_private *i915) -{ - intel_gt_fini_scratch(&i915->gt); -} - static int intel_engines_verify_workarounds(struct drm_i915_private *i915) { struct intel_engine_cs *engine; @@ -1427,7 +1239,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * we hold the forcewake during initialisation these problems * just magically go away. */ - mutex_lock(&dev_priv->drm.struct_mutex); intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); ret = i915_init_ggtt(dev_priv); @@ -1436,12 +1247,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_unlock; } - ret = i915_gem_init_scratch(dev_priv, - IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE); - if (ret) { - GEM_BUG_ON(ret == -EIO); - goto err_ggtt; - } + intel_gt_init(&dev_priv->gt); ret = intel_engines_setup(dev_priv); if (ret) { @@ -1449,7 +1255,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_unlock; } - ret = i915_gem_contexts_init(dev_priv); + ret = i915_gem_init_contexts(dev_priv); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_scratch; @@ -1465,7 +1271,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_uc_init(&dev_priv->gt.uc); - ret = i915_gem_init_hw(dev_priv); + ret = intel_gt_init_hw(&dev_priv->gt); if (ret) goto err_uc_init; @@ -1502,7 +1308,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_gt; intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); return 0; @@ -1513,32 +1318,25 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * driver doesn't explode during runtime. */ err_gt: - mutex_unlock(&dev_priv->drm.struct_mutex); - - intel_gt_set_wedged(&dev_priv->gt); + intel_gt_set_wedged_on_init(&dev_priv->gt); i915_gem_suspend(dev_priv); i915_gem_suspend_late(dev_priv); i915_gem_drain_workqueue(dev_priv); - - mutex_lock(&dev_priv->drm.struct_mutex); err_init_hw: intel_uc_fini_hw(&dev_priv->gt.uc); err_uc_init: if (ret != -EIO) { intel_uc_fini(&dev_priv->gt.uc); - intel_cleanup_gt_powersave(dev_priv); intel_engines_cleanup(dev_priv); } err_context: if (ret != -EIO) - i915_gem_contexts_fini(dev_priv); + i915_gem_driver_release__contexts(dev_priv); err_scratch: - i915_gem_fini_scratch(dev_priv); -err_ggtt: + intel_gt_driver_release(&dev_priv->gt); err_unlock: intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); if (ret != -EIO) { intel_uc_cleanup_firmwares(&dev_priv->gt.uc); @@ -1547,8 +1345,6 @@ err_unlock: } if (ret == -EIO) { - mutex_lock(&dev_priv->drm.struct_mutex); - /* * Allow engines or uC initialisation to fail by marking the GPU * as wedged. But we only want to do this when the GPU is angry, @@ -1563,10 +1359,8 @@ err_unlock: /* Minimal basic recovery for KMS */ ret = i915_ggtt_enable_hw(dev_priv); i915_gem_restore_gtt_mappings(dev_priv); - i915_gem_restore_fences(dev_priv); + i915_gem_restore_fences(&dev_priv->ggtt); intel_init_clock_gating(dev_priv); - - mutex_unlock(&dev_priv->drm.struct_mutex); } i915_gem_drain_freed_objects(dev_priv); @@ -1587,43 +1381,35 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915) void i915_gem_driver_remove(struct drm_i915_private *dev_priv) { - GEM_BUG_ON(dev_priv->gt.awake); - intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); i915_gem_suspend_late(dev_priv); - intel_disable_gt_powersave(dev_priv); + intel_gt_driver_remove(&dev_priv->gt); /* Flush any outstanding unpin_work. */ i915_gem_drain_workqueue(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); intel_uc_fini_hw(&dev_priv->gt.uc); intel_uc_fini(&dev_priv->gt.uc); - mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_drain_freed_objects(dev_priv); } void i915_gem_driver_release(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->drm.struct_mutex); intel_engines_cleanup(dev_priv); - i915_gem_contexts_fini(dev_priv); - i915_gem_fini_scratch(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_driver_release__contexts(dev_priv); + intel_gt_driver_release(&dev_priv->gt); intel_wa_list_free(&dev_priv->gt_wa_list); - intel_cleanup_gt_powersave(dev_priv); - intel_uc_cleanup_firmwares(&dev_priv->gt.uc); i915_gem_cleanup_userptr(dev_priv); intel_timelines_fini(dev_priv); i915_gem_drain_freed_objects(dev_priv); - WARN_ON(!list_empty(&dev_priv->contexts.list)); + WARN_ON(!list_empty(&dev_priv->gem.contexts.list)); } void i915_gem_init_mmio(struct drm_i915_private *i915) @@ -1643,20 +1429,12 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) i915_gem_init__objects(i915); } -int i915_gem_init_early(struct drm_i915_private *dev_priv) +void i915_gem_init_early(struct drm_i915_private *dev_priv) { - int err; - i915_gem_init__mm(dev_priv); i915_gem_init__pm(dev_priv); spin_lock_init(&dev_priv->fb_tracking.lock); - - err = i915_gemfs_init(dev_priv); - if (err) - DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); - - return 0; } void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) @@ -1665,8 +1443,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.shrink_count); - - i915_gemfs_fini(dev_priv); } int i915_gem_freeze(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 167a7b56ed5b..2011f8e9a9f1 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -37,10 +37,8 @@ struct drm_i915_private; #define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER) #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ - pr_err("%s:%d GEM_BUG_ON(%s)\n", \ - __func__, __LINE__, __stringify(condition)); \ - GEM_TRACE("%s:%d GEM_BUG_ON(%s)\n", \ - __func__, __LINE__, __stringify(condition)); \ + GEM_TRACE_ERR("%s:%d GEM_BUG_ON(%s)\n", \ + __func__, __LINE__, __stringify(condition)); \ BUG(); \ } \ } while(0) @@ -66,17 +64,33 @@ struct drm_i915_private; #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM) #define GEM_TRACE(...) trace_printk(__VA_ARGS__) +#define GEM_TRACE_ERR(...) do { \ + pr_err(__VA_ARGS__); \ + trace_printk(__VA_ARGS__); \ +} while (0) #define GEM_TRACE_DUMP() ftrace_dump(DUMP_ALL) #define GEM_TRACE_DUMP_ON(expr) \ do { if (expr) ftrace_dump(DUMP_ALL); } while (0) #else #define GEM_TRACE(...) do { } while (0) +#define GEM_TRACE_ERR(...) do { } while (0) #define GEM_TRACE_DUMP() do { } while (0) #define GEM_TRACE_DUMP_ON(expr) BUILD_BUG_ON_INVALID(expr) #endif #define I915_GEM_IDLE_TIMEOUT (HZ / 5) +static inline void tasklet_lock(struct tasklet_struct *t) +{ + while (!tasklet_trylock(t)) + cpu_relax(); +} + +static inline bool tasklet_is_locked(const struct tasklet_struct *t) +{ + return test_bit(TASKLET_STATE_RUN, &t->state); +} + static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) @@ -98,4 +112,18 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t) return test_bit(TASKLET_STATE_SCHED, &t->state); } +static inline void cancel_timer(struct timer_list *t) +{ + if (!READ_ONCE(t->expires)) + return; + + del_timer(t); + WRITE_ONCE(t->expires, 0); +} + +static inline bool timer_expired(const struct timer_list *t) +{ + return READ_ONCE(t->expires) && !timer_pending(t); +} + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 7abcac3b5e2e..7e62c310290f 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -29,6 +29,7 @@ #include <drm/i915_drm.h> #include "gem/i915_gem_context.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_trace.h" @@ -37,7 +38,7 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl { bool fail_if_busy:1; } igt_evict_ctl;) -static int ggtt_flush(struct drm_i915_private *i915) +static int ggtt_flush(struct intel_gt *gt) { /* * Not everything in the GGTT is tracked via vma (otherwise we @@ -46,10 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915) * the hopes that we can then remove contexts and the like only * bound by their active reference. */ - return i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); + return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); } static bool @@ -70,7 +68,7 @@ mark_free(struct drm_mm_scan *scan, * @vm: address space to evict from * @min_size: size of the desired free space * @alignment: alignment constraint of the desired free space - * @cache_level: cache_level for the desired space + * @color: color for the desired space * @start: start (inclusive) of the range from which to evict objects * @end: end (exclusive) of the range from which to evict objects * @flags: additional flags to control the eviction algorithm @@ -91,11 +89,10 @@ mark_free(struct drm_mm_scan *scan, int i915_gem_evict_something(struct i915_address_space *vm, u64 min_size, u64 alignment, - unsigned cache_level, + unsigned long color, u64 start, u64 end, unsigned flags) { - struct drm_i915_private *dev_priv = vm->i915; struct drm_mm_scan scan; struct list_head eviction_list; struct i915_vma *vma, *next; @@ -104,7 +101,7 @@ i915_gem_evict_something(struct i915_address_space *vm, struct i915_vma *active; int ret; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); trace_i915_gem_evict(vm, min_size, alignment, flags); /* @@ -124,17 +121,10 @@ i915_gem_evict_something(struct i915_address_space *vm, if (flags & PIN_MAPPABLE) mode = DRM_MM_INSERT_LOW; drm_mm_scan_init_with_range(&scan, &vm->mm, - min_size, alignment, cache_level, + min_size, alignment, color, start, end, mode); - /* - * Retire before we search the active list. Although we have - * reasonable accuracy in our retirement lists, we may have - * a stray pin (preventing eviction) that can only be resolved by - * retiring. - */ - if (!(flags & PIN_NONBLOCK)) - i915_retire_requests(dev_priv); + intel_gt_retire_requests(vm->gt); search_again: active = NULL; @@ -207,7 +197,7 @@ search_again: if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) return -EBUSY; - ret = ggtt_flush(dev_priv); + ret = ggtt_flush(vm->gt); if (ret) return ret; @@ -235,12 +225,12 @@ found: list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) { vma = container_of(node, struct i915_vma, node); - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } return ret; @@ -266,25 +256,23 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, u64 start = target->start; u64 end = start + target->size; struct i915_vma *vma, *next; - bool check_color; int ret = 0; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); trace_i915_gem_evict_node(vm, target, flags); - /* Retire before we search the active list. Although we have + /* + * Retire before we search the active list. Although we have * reasonable accuracy in our retirement lists, we may have * a stray pin (preventing eviction) that can only be resolved by * retiring. */ - if (!(flags & PIN_NONBLOCK)) - i915_retire_requests(vm->i915); + intel_gt_retire_requests(vm->gt); - check_color = vm->mm.color_adjust; - if (check_color) { + if (i915_vm_has_cache_coloring(vm)) { /* Expand search to cover neighbouring guard pages (or lack!) */ if (start) start -= I915_GTT_PAGE_SIZE; @@ -310,7 +298,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * abutt and conflict. If they are in conflict, then we evict * those as well to make room for our guard pages. */ - if (check_color) { + if (i915_vm_has_cache_coloring(vm)) { if (node->start + node->size == target->start) { if (node->color == target->color) continue; @@ -351,7 +339,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } return ret; @@ -375,7 +363,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm) struct i915_vma *vma, *next; int ret; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); trace_i915_gem_evict_vm(vm); /* Switch back to the default context in order to unpin @@ -384,13 +372,12 @@ int i915_gem_evict_vm(struct i915_address_space *vm) * switch otherwise is ineffective. */ if (i915_is_ggtt(vm)) { - ret = ggtt_flush(vm->i915); + ret = ggtt_flush(vm->gt); if (ret) return ret; } INIT_LIST_HEAD(&eviction_list); - mutex_lock(&vm->mutex); list_for_each_entry(vma, &vm->bound_list, vm_link) { if (i915_vma_is_pinned(vma)) continue; @@ -398,13 +385,12 @@ int i915_gem_evict_vm(struct i915_address_space *vm) __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list); } - mutex_unlock(&vm->mutex); ret = 0; list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 615a9f4ef30c..321189e1b0f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -59,6 +59,16 @@ #define pipelined 0 +static struct drm_i915_private *fence_to_i915(struct i915_fence_reg *fence) +{ + return fence->ggtt->vm.i915; +} + +static struct intel_uncore *fence_to_uncore(struct i915_fence_reg *fence) +{ + return fence->ggtt->vm.gt->uncore; +} + static void i965_write_fence_reg(struct i915_fence_reg *fence, struct i915_vma *vma) { @@ -66,7 +76,7 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence, int fence_pitch_shift; u64 val; - if (INTEL_GEN(fence->i915) >= 6) { + if (INTEL_GEN(fence_to_i915(fence)) >= 6) { fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; @@ -95,7 +105,7 @@ static void i965_write_fence_reg(struct i915_fence_reg *fence, } if (!pipelined) { - struct intel_uncore *uncore = &fence->i915->uncore; + struct intel_uncore *uncore = fence_to_uncore(fence); /* * To w/a incoherency with non-atomic 64-bit register updates, @@ -132,7 +142,7 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence, GEM_BUG_ON(!is_power_of_2(vma->fence_size)); GEM_BUG_ON(!IS_ALIGNED(vma->node.start, vma->fence_size)); - if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) + if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence_to_i915(fence))) stride /= 128; else stride /= 512; @@ -148,7 +158,7 @@ static void i915_write_fence_reg(struct i915_fence_reg *fence, } if (!pipelined) { - struct intel_uncore *uncore = &fence->i915->uncore; + struct intel_uncore *uncore = fence_to_uncore(fence); i915_reg_t reg = FENCE_REG(fence->id); intel_uncore_write_fw(uncore, reg, val); @@ -180,7 +190,7 @@ static void i830_write_fence_reg(struct i915_fence_reg *fence, } if (!pipelined) { - struct intel_uncore *uncore = &fence->i915->uncore; + struct intel_uncore *uncore = fence_to_uncore(fence); i915_reg_t reg = FENCE_REG(fence->id); intel_uncore_write_fw(uncore, reg, val); @@ -191,15 +201,17 @@ static void i830_write_fence_reg(struct i915_fence_reg *fence, static void fence_write(struct i915_fence_reg *fence, struct i915_vma *vma) { + struct drm_i915_private *i915 = fence_to_i915(fence); + /* * Previous access through the fence register is marshalled by * the mb() inside the fault handlers (i915_gem_release_mmaps) * and explicitly managed for internal users. */ - if (IS_GEN(fence->i915, 2)) + if (IS_GEN(i915, 2)) i830_write_fence_reg(fence, vma); - else if (IS_GEN(fence->i915, 3)) + else if (IS_GEN(i915, 3)) i915_write_fence_reg(fence, vma); else i965_write_fence_reg(fence, vma); @@ -215,6 +227,8 @@ static void fence_write(struct i915_fence_reg *fence, static int fence_update(struct i915_fence_reg *fence, struct i915_vma *vma) { + struct i915_ggtt *ggtt = fence->ggtt; + struct intel_uncore *uncore = fence_to_uncore(fence); intel_wakeref_t wakeref; struct i915_vma *old; int ret; @@ -230,14 +244,15 @@ static int fence_update(struct i915_fence_reg *fence, i915_gem_object_get_tiling(vma->obj))) return -EINVAL; - ret = i915_active_wait(&vma->active); + ret = i915_vma_sync(vma); if (ret) return ret; } old = xchg(&fence->vma, NULL); if (old) { - ret = i915_active_wait(&old->active); + /* XXX Ideally we would move the waiting to outside the mutex */ + ret = i915_vma_sync(old); if (ret) { fence->vma = old; return ret; @@ -255,7 +270,7 @@ static int fence_update(struct i915_fence_reg *fence, old->fence = NULL; } - list_move(&fence->link, &fence->i915->ggtt.fence_list); + list_move(&fence->link, &ggtt->fence_list); } /* @@ -268,7 +283,7 @@ static int fence_update(struct i915_fence_reg *fence, * be cleared before we can use any other fences to ensure that * the new fences do not overlap the elided clears, confusing HW. */ - wakeref = intel_runtime_pm_get_if_in_use(&fence->i915->runtime_pm); + wakeref = intel_runtime_pm_get_if_in_use(uncore->rpm); if (!wakeref) { GEM_BUG_ON(vma); return 0; @@ -279,10 +294,10 @@ static int fence_update(struct i915_fence_reg *fence, if (vma) { vma->fence = fence; - list_move_tail(&fence->link, &fence->i915->ggtt.fence_list); + list_move_tail(&fence->link, &ggtt->fence_list); } - intel_runtime_pm_put(&fence->i915->runtime_pm, wakeref); + intel_runtime_pm_put(uncore->rpm, wakeref); return 0; } @@ -311,11 +326,11 @@ int i915_vma_revoke_fence(struct i915_vma *vma) return fence_update(fence, NULL); } -static struct i915_fence_reg *fence_find(struct drm_i915_private *i915) +static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) { struct i915_fence_reg *fence; - list_for_each_entry(fence, &i915->ggtt.fence_list, link) { + list_for_each_entry(fence, &ggtt->fence_list, link) { GEM_BUG_ON(fence->vma && fence->vma->fence != fence); if (atomic_read(&fence->pin_count)) @@ -325,19 +340,21 @@ static struct i915_fence_reg *fence_find(struct drm_i915_private *i915) } /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(i915)) + if (intel_has_pending_fb_unpin(ggtt->vm.i915)) return ERR_PTR(-EAGAIN); return ERR_PTR(-EDEADLK); } -static int __i915_vma_pin_fence(struct i915_vma *vma) +int __i915_vma_pin_fence(struct i915_vma *vma) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); struct i915_fence_reg *fence; struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; int err; + lockdep_assert_held(&vma->vm->mutex); + /* Just update our place in the LRU if our fence is getting reused. */ if (vma->fence) { fence = vma->fence; @@ -348,7 +365,7 @@ static int __i915_vma_pin_fence(struct i915_vma *vma) return 0; } } else if (set) { - fence = fence_find(vma->vm->i915); + fence = fence_find(ggtt); if (IS_ERR(fence)) return PTR_ERR(fence); @@ -399,7 +416,7 @@ int i915_vma_pin_fence(struct i915_vma *vma) * Note that we revoke fences on runtime suspend. Therefore the user * must keep the device awake whilst using the fence. */ - assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm); + assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm); GEM_BUG_ON(!i915_vma_is_pinned(vma)); GEM_BUG_ON(!i915_vma_is_ggtt(vma)); @@ -415,14 +432,13 @@ int i915_vma_pin_fence(struct i915_vma *vma) /** * i915_reserve_fence - Reserve a fence for vGPU - * @i915: i915 device private + * @ggtt: Global GTT * * This function walks the fence regs looking for a free one and remove * it from the fence_list. It is used to reserve fence for vGPU to use. */ -struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915) +struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt) { - struct i915_ggtt *ggtt = &i915->ggtt; struct i915_fence_reg *fence; int count; int ret; @@ -436,7 +452,7 @@ struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915) if (count <= 1) return ERR_PTR(-ENOSPC); - fence = fence_find(i915); + fence = fence_find(ggtt); if (IS_ERR(fence)) return fence; @@ -460,7 +476,7 @@ struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915) */ void i915_unreserve_fence(struct i915_fence_reg *fence) { - struct i915_ggtt *ggtt = &fence->i915->ggtt; + struct i915_ggtt *ggtt = fence->ggtt; lockdep_assert_held(&ggtt->vm.mutex); @@ -469,19 +485,19 @@ void i915_unreserve_fence(struct i915_fence_reg *fence) /** * i915_gem_restore_fences - restore fence state - * @i915: i915 device private + * @ggtt: Global GTT * * Restore the hw fence state to match the software tracking again, to be called * after a gpu reset and on resume. Note that on runtime suspend we only cancel * the fences, to be reacquired by the user later. */ -void i915_gem_restore_fences(struct drm_i915_private *i915) +void i915_gem_restore_fences(struct i915_ggtt *ggtt) { int i; rcu_read_lock(); /* keep obj alive as we dereference */ - for (i = 0; i < i915->ggtt.num_fences; i++) { - struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; + for (i = 0; i < ggtt->num_fences; i++) { + struct i915_fence_reg *reg = &ggtt->fence_regs[i]; struct i915_vma *vma = READ_ONCE(reg->vma); GEM_BUG_ON(vma && vma->fence != reg); @@ -547,15 +563,16 @@ void i915_gem_restore_fences(struct drm_i915_private *i915) */ /** - * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern - * @i915: i915 device private + * detect_bit_6_swizzle - detect bit 6 swizzling pattern + * @ggtt: Global GGTT * * Detects bit 6 swizzling of address lookup between IGD access and CPU * access through main memory. */ -static void detect_bit_6_swizzle(struct drm_i915_private *i915) +static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) { - struct intel_uncore *uncore = &i915->uncore; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; + struct drm_i915_private *i915 = ggtt->vm.i915; u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; @@ -717,8 +734,8 @@ static void detect_bit_6_swizzle(struct drm_i915_private *i915) swizzle_y = I915_BIT_6_SWIZZLE_NONE; } - i915->mm.bit_6_swizzle_x = swizzle_x; - i915->mm.bit_6_swizzle_y = swizzle_y; + i915->ggtt.bit_6_swizzle_x = swizzle_x; + i915->ggtt.bit_6_swizzle_y = swizzle_y; } /* @@ -819,14 +836,15 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, void i915_ggtt_init_fences(struct i915_ggtt *ggtt) { struct drm_i915_private *i915 = ggtt->vm.i915; + struct intel_uncore *uncore = ggtt->vm.gt->uncore; int num_fences; int i; INIT_LIST_HEAD(&ggtt->fence_list); INIT_LIST_HEAD(&ggtt->userfault_list); - intel_wakeref_auto_init(&ggtt->userfault_wakeref, &i915->runtime_pm); + intel_wakeref_auto_init(&ggtt->userfault_wakeref, uncore->rpm); - detect_bit_6_swizzle(i915); + detect_bit_6_swizzle(ggtt); if (INTEL_GEN(i915) >= 7 && !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))) @@ -839,20 +857,20 @@ void i915_ggtt_init_fences(struct i915_ggtt *ggtt) num_fences = 8; if (intel_vgpu_active(i915)) - num_fences = intel_uncore_read(&i915->uncore, + num_fences = intel_uncore_read(uncore, vgtif_reg(avail_rs.fence_num)); /* Initialize fence registers to zero */ for (i = 0; i < num_fences; i++) { struct i915_fence_reg *fence = &ggtt->fence_regs[i]; - fence->i915 = i915; + fence->ggtt = ggtt; fence->id = i; list_add_tail(&fence->link, &ggtt->fence_list); } ggtt->num_fences = num_fences; - i915_gem_restore_fences(i915); + i915_gem_restore_fences(ggtt); } void intel_gt_init_swizzling(struct intel_gt *gt) @@ -861,7 +879,7 @@ void intel_gt_init_swizzling(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; if (INTEL_GEN(i915) < 5 || - i915->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) + i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) return; intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING); diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h index 99866fb9d94f..7bd521cd7cd7 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -29,7 +29,6 @@ #include <linux/types.h> struct drm_i915_gem_object; -struct drm_i915_private; struct i915_ggtt; struct i915_vma; struct intel_gt; @@ -39,7 +38,7 @@ struct sg_table; struct i915_fence_reg { struct list_head link; - struct drm_i915_private *i915; + struct i915_ggtt *ggtt; struct i915_vma *vma; atomic_t pin_count; int id; @@ -55,10 +54,10 @@ struct i915_fence_reg { }; /* i915_gem_fence_reg.c */ -struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915); +struct i915_fence_reg *i915_reserve_fence(struct i915_ggtt *ggtt); void i915_unreserve_fence(struct i915_fence_reg *fence); -void i915_gem_restore_fences(struct drm_i915_private *i915); +void i915_gem_restore_fences(struct i915_ggtt *ggtt); void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, struct sg_table *pages); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index aa4c85603dcb..3148d5946b63 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -38,6 +38,7 @@ #include "display/intel_frontbuffer.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_scatterlist.h" @@ -132,9 +133,15 @@ static void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) { struct intel_uncore *uncore = ggtt->vm.gt->uncore; + struct drm_i915_private *i915 = ggtt->vm.i915; gen6_ggtt_invalidate(ggtt); - intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); + + if (INTEL_GEN(i915) >= 12) + intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, + GEN12_GUC_TLB_INV_CR_INVALIDATE); + else + intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); } static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) @@ -144,16 +151,18 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) static int ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 unused) + u32 flags) { u32 pte_flags; int err; - if (!(vma->flags & I915_VMA_LOCAL_BIND)) { + if (flags & I915_VMA_ALLOC) { err = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); if (err) return err; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); } /* Applicable to VLV, and gen8+ */ @@ -161,14 +170,17 @@ static int ppgtt_bind_vma(struct i915_vma *vma, if (i915_gem_object_is_readonly(vma->obj)) pte_flags |= PTE_READ_ONLY; + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))); vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); + wmb(); return 0; } static void ppgtt_unbind_vma(struct i915_vma *vma) { - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } static int ppgtt_set_pages(struct i915_vma *vma) @@ -496,22 +508,26 @@ static void i915_address_space_fini(struct i915_address_space *vm) mutex_destroy(&vm->mutex); } -static void ppgtt_destroy_vma(struct i915_address_space *vm) +void __i915_vm_close(struct i915_address_space *vm) { - struct list_head *phases[] = { - &vm->bound_list, - &vm->unbound_list, - NULL, - }, **phase; + struct i915_vma *vma, *vn; + + mutex_lock(&vm->mutex); + list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; - mutex_lock(&vm->i915->drm.struct_mutex); - for (phase = phases; *phase; phase++) { - struct i915_vma *vma, *vn; + /* Keep the obj (and hence the vma) alive as _we_ destroy it */ + if (!kref_get_unless_zero(&obj->base.refcount)) + continue; + + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); + i915_vma_destroy(vma); - list_for_each_entry_safe(vma, vn, *phase, vm_link) - i915_vma_destroy(vma); + i915_gem_object_put(obj); } - mutex_unlock(&vm->i915->drm.struct_mutex); + GEM_BUG_ON(!list_empty(&vm->bound_list)); + mutex_unlock(&vm->mutex); } static void __i915_vm_release(struct work_struct *work) @@ -519,11 +535,6 @@ static void __i915_vm_release(struct work_struct *work) struct i915_address_space *vm = container_of(work, struct i915_address_space, rcu.work); - ppgtt_destroy_vma(vm); - - GEM_BUG_ON(!list_empty(&vm->bound_list)); - GEM_BUG_ON(!list_empty(&vm->unbound_list)); - vm->cleanup(vm); i915_address_space_fini(vm); @@ -538,7 +549,6 @@ void i915_vm_release(struct kref *kref) GEM_BUG_ON(i915_is_ggtt(vm)); trace_i915_ppgtt_release(vm); - vm->closed = true; queue_rcu_work(vm->i915->wq, &vm->rcu); } @@ -546,6 +556,7 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass) { kref_init(&vm->ref); INIT_RCU_WORK(&vm->rcu, __i915_vm_release); + atomic_set(&vm->open, 1); /* * The vm->mutex must be reclaim safe (for use in the shrinker). @@ -562,7 +573,6 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass) stash_init(&vm->free_pages); - INIT_LIST_HEAD(&vm->unbound_list); INIT_LIST_HEAD(&vm->bound_list); } @@ -816,17 +826,6 @@ release_pd_entry(struct i915_page_directory * const pd, return free; } -/* - * PDE TLBs are a pain to invalidate on GEN8+. When we modify - * the page table structures, we mark them dirty so that - * context switching/execlist queuing code takes extra steps - * to ensure that tlbs are flushed. - */ -static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt) -{ - ppgtt->pd_dirty_engines = ALL_ENGINES; -} - static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) { struct drm_i915_private *dev_priv = ppgtt->vm.i915; @@ -1367,7 +1366,9 @@ static int gen8_init_scratch(struct i915_address_space *vm) if (vm->has_read_only && vm->i915->kernel_context && vm->i915->kernel_context->vm) { - struct i915_address_space *clone = vm->i915->kernel_context->vm; + struct i915_address_space *clone = + rcu_dereference_protected(vm->i915->kernel_context->vm, + true); /* static */ GEM_BUG_ON(!clone->has_read_only); @@ -1422,6 +1423,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) set_pd_entry(pd, idx, pde); atomic_inc(px_used(pde)); /* keep pinned */ } + wmb(); return 0; } @@ -1489,8 +1491,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) * * Gen11 has HSDES#:1807136187 unresolved. Disable ro support * for now. + * + * Gen12 has inherited the same read-only fault issue from gen11. */ - ppgtt->vm.has_read_only = INTEL_GEN(i915) != 11; + ppgtt->vm.has_read_only = !IS_GEN_RANGE(i915, 11, 12); /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. @@ -1509,13 +1513,12 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) } if (!i915_vm_is_4lvl(&ppgtt->vm)) { - if (intel_vgpu_active(i915)) { - err = gen8_preallocate_top_level_pdp(ppgtt); - if (err) - goto err_free_pd; - } + err = gen8_preallocate_top_level_pdp(ppgtt); + if (err) + goto err_free_pd; } + ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; @@ -1566,7 +1569,7 @@ static void gen7_ppgtt_enable(struct intel_gt *gt) } intel_uncore_write(uncore, GAM_ECOCHK, ecochk); - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { /* GFX_MODE is per-ring on gen7+ */ ENGINE_WRITE(engine, RING_MODE_GEN7, @@ -1729,10 +1732,8 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, } spin_unlock(&pd->lock); - if (flush) { - mark_tlbs_dirty(&ppgtt->base); + if (flush) gen6_ggtt_invalidate(vm->gt->ggtt); - } goto out; @@ -1786,15 +1787,13 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - struct drm_i915_private *i915 = vm->i915; - /* FIXME remove the struct_mutex to bring the locking under control */ - mutex_lock(&i915->drm.struct_mutex); i915_vma_destroy(ppgtt->vma); - mutex_unlock(&i915->drm.struct_mutex); gen6_ppgtt_free_pd(ppgtt); free_scratch(vm); + + mutex_destroy(&ppgtt->pin_mutex); kfree(ppgtt->base.pd); } @@ -1827,7 +1826,6 @@ static int pd_vma_bind(struct i915_vma *vma, gen6_for_all_pdes(pt, ppgtt->base.pd, pde) gen6_write_pde(ppgtt, pde, pt); - mark_tlbs_dirty(&ppgtt->base); gen6_ggtt_invalidate(ggtt); return 0; @@ -1866,7 +1864,6 @@ static const struct i915_vma_ops pd_vma_ops = { static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) { - struct drm_i915_private *i915 = ppgtt->base.vm.i915; struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; struct i915_vma *vma; @@ -1877,33 +1874,30 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) if (!vma) return ERR_PTR(-ENOMEM); - i915_active_init(i915, &vma->active, NULL, NULL); + i915_active_init(&vma->active, NULL, NULL); - vma->vm = &ggtt->vm; + mutex_init(&vma->pages_mutex); + vma->vm = i915_vm_get(&ggtt->vm); vma->ops = &pd_vma_ops; vma->private = ppgtt; vma->size = size; vma->fence_size = size; - vma->flags = I915_VMA_GGTT; + atomic_set(&vma->flags, I915_VMA_GGTT); vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ INIT_LIST_HEAD(&vma->obj_link); INIT_LIST_HEAD(&vma->closed_link); - mutex_lock(&vma->vm->mutex); - list_add(&vma->vm_link, &vma->vm->unbound_list); - mutex_unlock(&vma->vm->mutex); - return vma; } int gen6_ppgtt_pin(struct i915_ppgtt *base) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - int err; + int err = 0; - GEM_BUG_ON(ppgtt->base.vm.closed); + GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open)); /* * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt @@ -1911,24 +1905,26 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base) * (When vma->pin_count becomes atomic, I expect we will naturally * need a larger, unpacked, type and kill this redundancy.) */ - if (ppgtt->pin_count++) + if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) return 0; + if (mutex_lock_interruptible(&ppgtt->pin_mutex)) + return -EINTR; + /* * PPGTT PDEs reside in the GGTT and consists of 512 entries. The * allocator works in address space sizes, so it's multiplied by page * size. We allocate at the top of the GTT to avoid fragmentation. */ - err = i915_vma_pin(ppgtt->vma, - 0, GEN6_PD_ALIGN, - PIN_GLOBAL | PIN_HIGH); - if (err) - goto unpin; - - return 0; + if (!atomic_read(&ppgtt->pin_count)) { + err = i915_vma_pin(ppgtt->vma, + 0, GEN6_PD_ALIGN, + PIN_GLOBAL | PIN_HIGH); + } + if (!err) + atomic_inc(&ppgtt->pin_count); + mutex_unlock(&ppgtt->pin_mutex); -unpin: - ppgtt->pin_count = 0; return err; } @@ -1936,22 +1932,20 @@ void gen6_ppgtt_unpin(struct i915_ppgtt *base) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - GEM_BUG_ON(!ppgtt->pin_count); - if (--ppgtt->pin_count) - return; - - i915_vma_unpin(ppgtt->vma); + GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); + if (atomic_dec_and_test(&ppgtt->pin_count)) + i915_vma_unpin(ppgtt->vma); } void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); - if (!ppgtt->pin_count) + if (!atomic_read(&ppgtt->pin_count)) return; - ppgtt->pin_count = 0; i915_vma_unpin(ppgtt->vma); + atomic_set(&ppgtt->pin_count, 0); } static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) @@ -1964,9 +1958,12 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) if (!ppgtt) return ERR_PTR(-ENOMEM); + mutex_init(&ppgtt->pin_mutex); + ppgtt_init(&ppgtt->base, &i915->gt); ppgtt->base.vm.top = 1; + ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; @@ -2023,7 +2020,7 @@ static void gtt_write_workarounds(struct intel_gt *gt) intel_uncore_write(uncore, GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); - else if (INTEL_GEN(i915) >= 9) + else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) intel_uncore_write(uncore, GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); @@ -2202,7 +2199,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; - for_each_sgt_dma(addr, sgt_iter, vma->pages) + for_each_sgt_daddr(addr, sgt_iter, vma->pages) gen8_set_pte(gtt_entries++, pte_encode | addr); /* @@ -2243,7 +2240,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; struct sgt_iter iter; dma_addr_t addr; - for_each_sgt_dma(addr, iter, vma->pages) + for_each_sgt_daddr(addr, iter, vma->pages) iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); /* @@ -2448,7 +2445,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally * upgrade to both bound if we bind either to avoid double-binding. */ - vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); return 0; } @@ -2478,14 +2475,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (flags & I915_VMA_LOCAL_BIND) { struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; - if (!(vma->flags & I915_VMA_LOCAL_BIND)) { + if (flags & I915_VMA_ALLOC) { ret = alias->vm.allocate_va_range(&alias->vm, vma->node.start, vma->size); if (ret) return ret; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); } + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, + __i915_vma_flags(vma))); alias->vm.insert_entries(&alias->vm, vma, cache_level, pte_flags); } @@ -2506,7 +2507,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - if (vma->flags & I915_VMA_GLOBAL_BIND) { + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { struct i915_address_space *vm = vma->vm; intel_wakeref_t wakeref; @@ -2514,7 +2515,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma) vm->clear_range(vm, vma->node.start, vma->size); } - if (vma->flags & I915_VMA_LOCAL_BIND) { + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { struct i915_address_space *vm = &i915_vm_to_ggtt(vma->vm)->alias->vm; @@ -2530,7 +2531,9 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &dev_priv->ggtt; if (unlikely(ggtt->do_idle_maps)) { - if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) { + /* XXX This does not prevent more requests being submitted! */ + if (intel_gt_retire_requests_timeout(ggtt->vm.gt, + -MAX_SCHEDULE_TIMEOUT)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); @@ -2555,12 +2558,12 @@ static int ggtt_set_pages(struct i915_vma *vma) return 0; } -static void i915_gtt_color_adjust(const struct drm_mm_node *node, - unsigned long color, - u64 *start, - u64 *end) +static void i915_ggtt_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, + u64 *end) { - if (drm_mm_node_allocated(node) && node->color != color) + if (i915_node_color_differs(node, color)) *start += I915_GTT_PAGE_SIZE; /* Also leave a space between the unallocated reserved node after the @@ -2598,6 +2601,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) goto err_ppgtt; ggtt->alias = ppgtt; + ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; @@ -2605,6 +2609,8 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; + ppgtt->vm.total = ggtt->vm.total; + return 0; err_ppgtt: @@ -2614,22 +2620,16 @@ err_ppgtt: static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) { - struct drm_i915_private *i915 = ggtt->vm.i915; struct i915_ppgtt *ppgtt; - mutex_lock(&i915->drm.struct_mutex); - ppgtt = fetch_and_zero(&ggtt->alias); if (!ppgtt) - goto out; + return; i915_vm_put(&ppgtt->vm); ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - -out: - mutex_unlock(&i915->drm.struct_mutex); } static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) @@ -2744,34 +2744,83 @@ int i915_init_ggtt(struct drm_i915_private *i915) return 0; } +void i915_gem_cleanup_memory_regions(struct drm_i915_private *i915) +{ + int i; + + for (i = 0; i < INTEL_REGION_UNKNOWN; i++) { + struct intel_memory_region *region = i915->mm.regions[i]; + + if (region) + intel_memory_region_put(region); + } +} + +int i915_gem_init_memory_regions(struct drm_i915_private *i915) +{ + int err, i; + + for (i = 0; i < INTEL_REGION_UNKNOWN; i++) { + struct intel_memory_region *mem = ERR_PTR(-ENODEV); + u32 type; + + if (!HAS_REGION(i915, BIT(i))) + continue; + + type = MEMORY_TYPE_FROM_REGION(intel_region_map[i]); + switch (type) { + case INTEL_MEMORY_SYSTEM: + mem = i915_gem_shmem_setup(i915); + break; + case INTEL_MEMORY_STOLEN: + mem = i915_gem_stolen_setup(i915); + break; + } + + if (IS_ERR(mem)) { + err = PTR_ERR(mem); + DRM_ERROR("Failed to setup region(%d) type=%d\n", err, type); + goto out_cleanup; + } + + mem->id = intel_region_map[i]; + mem->type = type; + mem->instance = MEMORY_INSTANCE_FROM_REGION(intel_region_map[i]); + + i915->mm.regions[i] = mem; + } + + return 0; + +out_cleanup: + i915_gem_cleanup_memory_regions(i915); + return err; +} + static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) { - struct drm_i915_private *i915 = ggtt->vm.i915; struct i915_vma *vma, *vn; - ggtt->vm.closed = true; + atomic_set(&ggtt->vm.open, 0); rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ - flush_workqueue(i915->wq); + flush_workqueue(ggtt->vm.i915->wq); - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&ggtt->vm.mutex); list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) - WARN_ON(i915_vma_unbind(vma)); + WARN_ON(__i915_vma_unbind(vma)); if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); ggtt_release_guc_top(ggtt); - - if (drm_mm_initialized(&ggtt->vm.mm)) { - intel_vgt_deballoon(ggtt); - i915_address_space_fini(&ggtt->vm); - } + intel_vgt_deballoon(ggtt); ggtt->vm.cleanup(&ggtt->vm); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&ggtt->vm.mutex); + i915_address_space_fini(&ggtt->vm); arch_phys_wc_del(ggtt->mtrr); io_mapping_fini(&ggtt->iomap); @@ -2785,6 +2834,8 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915) { struct pagevec *pvec; + i915_gem_cleanup_memory_regions(i915); + fini_aliasing_ppgtt(&i915->ggtt); ggtt_cleanup_hw(&i915->ggtt); @@ -2794,8 +2845,6 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915) set_pages_array_wb(pvec->pages, pvec->nr); __pagevec_release(pvec); } - - i915_gem_cleanup_stolen(i915); } static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) @@ -3200,9 +3249,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) static int ggtt_init_hw(struct i915_ggtt *ggtt) { struct drm_i915_private *i915 = ggtt->vm.i915; - int ret = 0; - - mutex_lock(&i915->drm.struct_mutex); i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); @@ -3212,24 +3258,20 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt) ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) - ggtt->vm.mm.color_adjust = i915_gtt_color_adjust; + ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; if (!io_mapping_init_wc(&ggtt->iomap, ggtt->gmadr.start, ggtt->mappable_end)) { ggtt->vm.cleanup(&ggtt->vm); - ret = -EIO; - goto out; + return -EIO; } ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end); i915_ggtt_init_fences(ggtt); -out: - mutex_unlock(&i915->drm.struct_mutex); - - return ret; + return 0; } /** @@ -3251,11 +3293,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) if (ret) return ret; - /* - * Initialise stolen early so that we may reserve preallocated - * objects for the BIOS to KMS transition. - */ - ret = i915_gem_init_stolen(dev_priv); + ret = i915_gem_init_memory_regions(dev_priv); if (ret) goto out_gtt_cleanup; @@ -3301,6 +3339,7 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) { struct i915_vma *vma, *vn; bool flush = false; + int open; intel_gt_check_and_clear_faults(ggtt->vm.gt); @@ -3308,33 +3347,31 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) /* First fill our portion of the GTT with scratch pages */ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ + + /* Skip rewriting PTE on VMA unbind. */ + open = atomic_xchg(&ggtt->vm.open, 0); /* clflush objects bound into the GGTT and rebind them. */ list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; - if (!(vma->flags & I915_VMA_GLOBAL_BIND)) + if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) continue; - mutex_unlock(&ggtt->vm.mutex); - - if (!i915_vma_unbind(vma)) - goto lock; + if (!__i915_vma_unbind(vma)) + continue; + clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, - PIN_UPDATE)); + PIN_GLOBAL, NULL)); if (obj) { /* only used during resume => exclusive access */ flush |= fetch_and_zero(&obj->write_domain); obj->read_domains |= I915_GEM_DOMAIN_GTT; } - -lock: - mutex_lock(&ggtt->vm.mutex); } - ggtt->vm.closed = false; + atomic_set(&ggtt->vm.open, open); ggtt->invalidate(ggtt); mutex_unlock(&ggtt->vm.mutex); @@ -3726,7 +3763,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, u64 offset; int err; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); + GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(alignment && !is_power_of_2(alignment)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index b97a47fc7a68..f074f1de66e8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -148,8 +148,8 @@ typedef u64 gen8_pte_t; #define GEN8_PDE_IPS_64K BIT(11) #define GEN8_PDE_PS_2M BIT(7) -#define for_each_sgt_dma(__dmap, __iter, __sgt) \ - __for_each_sgt_dma(__dmap, __iter, __sgt, I915_GTT_PAGE_SIZE) +#define for_each_sgt_daddr(__dp, __iter, __sgt) \ + __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE) struct intel_remapped_plane_info { /* in gtt pages */ @@ -305,7 +305,16 @@ struct i915_address_space { u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ - bool closed; + unsigned int bind_async_flags; + + /* + * Each active user context has its own address space (in full-ppgtt). + * Since the vm may be shared between multiple contexts, we count how + * many contexts keep us "open". Once open hits zero, we are closed + * and do not allow any new attachments, and proceed to shutdown our + * vma and page directories. + */ + atomic_t open; struct mutex mutex; /* protects vma and our lists */ #define VM_CLASS_GGTT 0 @@ -320,11 +329,6 @@ struct i915_address_space { */ struct list_head bound_list; - /** - * List of vma that are not unbound. - */ - struct list_head unbound_list; - struct pagestash free_pages; /* Global GTT */ @@ -376,6 +380,12 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm) return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K); } +static inline bool +i915_vm_has_cache_coloring(struct i915_address_space *vm) +{ + return i915_is_ggtt(vm) && vm->mm.color_adjust; +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a @@ -401,6 +411,11 @@ struct i915_ggtt { int mtrr; + /** Bit 6 swizzling required for X tiling */ + u32 bit_6_swizzle_x; + /** Bit 6 swizzling required for Y tiling */ + u32 bit_6_swizzle_y; + u32 pin_bias; unsigned int num_fences; @@ -422,7 +437,6 @@ struct i915_ggtt { struct i915_ppgtt { struct i915_address_space vm; - intel_engine_mask_t pd_dirty_engines; struct i915_page_directory *pd; }; @@ -432,7 +446,9 @@ struct gen6_ppgtt { struct i915_vma *vma; gen6_pte_t __iomem *pd_addr; - unsigned int pin_count; + atomic_t pin_count; + struct mutex pin_mutex; + bool scan_for_unused_pt; }; @@ -577,6 +593,35 @@ static inline void i915_vm_put(struct i915_address_space *vm) kref_put(&vm->ref, i915_vm_release); } +static inline struct i915_address_space * +i915_vm_open(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + atomic_inc(&vm->open); + return i915_vm_get(vm); +} + +static inline bool +i915_vm_tryopen(struct i915_address_space *vm) +{ + if (atomic_add_unless(&vm->open, 1, 0)) + return i915_vm_get(vm); + + return false; +} + +void __i915_vm_close(struct i915_address_space *vm); + +static inline void +i915_vm_close(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + if (atomic_dec_and_test(&vm->open)) + __i915_vm_close(vm); + + i915_vm_put(vm); +} + int gen6_ppgtt_pin(struct i915_ppgtt *base); void gen6_ppgtt_unpin(struct i915_ppgtt *base); void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); @@ -609,10 +654,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, #define PIN_OFFSET_BIAS BIT_ULL(6) #define PIN_OFFSET_FIXED BIT_ULL(7) -#define PIN_MBZ BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */ -#define PIN_GLOBAL BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */ -#define PIN_USER BIT_ULL(10) /* I915_VMA_LOCAL_BIND */ -#define PIN_UPDATE BIT_ULL(11) +#define PIN_UPDATE BIT_ULL(9) +#define PIN_GLOBAL BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT_ULL(11) /* I915_VMA_LOCAL_BIND */ #define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 5d9101376a3d..ad33fbe90a28 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -5,6 +5,7 @@ #include "gt/intel_engine_user.h" #include "i915_drv.h" +#include "i915_perf.h" int i915_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -79,8 +80,8 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, break; case I915_PARAM_HAS_GPU_RESET: value = i915_modparams.enable_hangcheck && - intel_has_gpu_reset(i915); - if (value && intel_has_reset_engine(i915)) + intel_has_gpu_reset(&i915->gt); + if (value && intel_has_reset_engine(&i915->gt)) value = 2; break; case I915_PARAM_HAS_RESOURCE_STREAMER: @@ -156,6 +157,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_MMAP_GTT_COHERENT: value = INTEL_INFO(i915)->has_coherent_ggtt; break; + case I915_PARAM_PERF_REVISION: + value = i915_perf_ioctl_version(); + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index e284bd76fa86..5cf4eed5add8 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -421,6 +421,7 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m) static void error_print_instdone(struct drm_i915_error_state_buf *m, const struct drm_i915_error_engine *ee) { + const struct sseu_dev_info *sseu = &RUNTIME_INFO(m->i915)->sseu; int slice; int subslice; @@ -436,12 +437,12 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, if (INTEL_GEN(m->i915) <= 6) return; - for_each_instdone_slice_subslice(m->i915, slice, subslice) + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, ee->instdone.sampler[slice][subslice]); - for_each_instdone_slice_subslice(m->i915, slice, subslice) + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, ee->instdone.row[slice][subslice]); @@ -470,9 +471,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct drm_i915_error_context *ctx) { - err_printf(m, "%s%s[%d] hw_id %d, prio %d, guilty %d active %d\n", - header, ctx->comm, ctx->pid, ctx->hw_id, - ctx->sched_attr.priority, ctx->guilty, ctx->active); + err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n", + header, ctx->comm, ctx->pid, ctx->sched_attr.priority, + ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, @@ -574,6 +575,9 @@ static void print_error_obj(struct drm_i915_error_state_buf *m, lower_32_bits(obj->gtt_offset)); } + if (obj->gtt_page_sizes > I915_GTT_PAGE_SIZE_4K) + err_printf(m, "gtt_page_sizes = 0x%08x\n", obj->gtt_page_sizes); + err_compression_marker(m); for (page = 0; page < obj->page_count; page++) { int i, len; @@ -734,6 +738,9 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, if (IS_GEN(m->i915, 7)) err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); + if (IS_GEN_RANGE(m->i915, 8, 11)) + err_printf(m, "GTT_CACHE_EN: 0x%08x\n", error->gtt_cache); + for (ee = error->engine; ee; ee = ee->next) error_print_engine(m, ee, error->epoch); @@ -984,12 +991,13 @@ i915_error_object_create(struct drm_i915_private *i915, dst->gtt_offset = vma->node.start; dst->gtt_size = vma->node.size; + dst->gtt_page_sizes = vma->page_sizes.gtt; dst->num_pages = num_pages; dst->page_count = 0; dst->unused = 0; ret = -EINVAL; - for_each_sgt_dma(dma, iter, vma->pages) { + for_each_sgt_daddr(dma, iter, vma->pages) { void __iomem *s; ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); @@ -1263,7 +1271,6 @@ static bool record_context(struct drm_i915_error_context *e, rcu_read_unlock(); } - e->hw_id = ctx->hw_id; e->sched_attr = ctx->sched; e->guilty = atomic_read(&ctx->guilty_count); e->active = atomic_read(&ctx->active_count); @@ -1291,7 +1298,7 @@ capture_vma(struct capture_vma *next, if (!c) return next; - if (!i915_active_trygrab(&vma->active)) { + if (!i915_active_acquire_if_busy(&vma->active)) { kfree(c); return next; } @@ -1431,7 +1438,7 @@ gem_record_rings(struct i915_gpu_state *error, struct compress *compress) *this->slot = i915_error_object_create(i915, vma, compress); - i915_active_ungrab(&vma->active); + i915_active_release(&vma->active); i915_vma_put(vma); capture = this->next; @@ -1553,6 +1560,9 @@ static void capture_reg_state(struct i915_gpu_state *error) error->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS); } + if (IS_GEN_RANGE(i915, 8, 11)) + error->gtt_cache = intel_uncore_read(uncore, HSW_GTT_CACHE_EN); + /* 4: Everything else */ if (INTEL_GEN(i915) >= 11) { error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index df9f57766626..7f1cd0b1fef7 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -74,6 +74,7 @@ struct i915_gpu_state { u32 gam_ecochk; u32 gab_ctl; u32 gfx_mode; + u32 gtt_cache; u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; @@ -118,7 +119,6 @@ struct i915_gpu_state { struct drm_i915_error_context { char comm[TASK_COMM_LEN]; pid_t pid; - u32 hw_id; int active; int guilty; struct i915_sched_attr sched_attr; @@ -127,6 +127,7 @@ struct i915_gpu_state { struct drm_i915_error_object { u64 gtt_offset; u64 gtt_size; + u32 gtt_page_sizes; int num_pages; int page_count; int unused; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 37e3dd3c1a9d..572a5c37cc61 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -29,7 +29,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/circ_buf.h> -#include <linux/cpuidle.h> #include <linux/slab.h> #include <linux/sysrq.h> @@ -149,30 +148,24 @@ static const u32 hpd_gen12[HPD_NUM_PINS] = { }; static const u32 hpd_icp[HPD_NUM_PINS] = { - [HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP, - [HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP, - [HPD_PORT_C] = SDE_TC1_HOTPLUG_ICP, - [HPD_PORT_D] = SDE_TC2_HOTPLUG_ICP, - [HPD_PORT_E] = SDE_TC3_HOTPLUG_ICP, - [HPD_PORT_F] = SDE_TC4_HOTPLUG_ICP -}; - -static const u32 hpd_mcc[HPD_NUM_PINS] = { - [HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP, - [HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP, - [HPD_PORT_C] = SDE_TC1_HOTPLUG_ICP + [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), + [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), + [HPD_PORT_C] = SDE_TC_HOTPLUG_ICP(PORT_TC1), + [HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC2), + [HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC3), + [HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC4), }; static const u32 hpd_tgp[HPD_NUM_PINS] = { - [HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP, - [HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP, - [HPD_PORT_C] = SDE_DDIC_HOTPLUG_TGP, - [HPD_PORT_D] = SDE_TC1_HOTPLUG_ICP, - [HPD_PORT_E] = SDE_TC2_HOTPLUG_ICP, - [HPD_PORT_F] = SDE_TC3_HOTPLUG_ICP, - [HPD_PORT_G] = SDE_TC4_HOTPLUG_ICP, - [HPD_PORT_H] = SDE_TC5_HOTPLUG_TGP, - [HPD_PORT_I] = SDE_TC6_HOTPLUG_TGP, + [HPD_PORT_A] = SDE_DDI_HOTPLUG_ICP(PORT_A), + [HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(PORT_B), + [HPD_PORT_C] = SDE_DDI_HOTPLUG_ICP(PORT_C), + [HPD_PORT_D] = SDE_TC_HOTPLUG_ICP(PORT_TC1), + [HPD_PORT_E] = SDE_TC_HOTPLUG_ICP(PORT_TC2), + [HPD_PORT_F] = SDE_TC_HOTPLUG_ICP(PORT_TC3), + [HPD_PORT_G] = SDE_TC_HOTPLUG_ICP(PORT_TC4), + [HPD_PORT_H] = SDE_TC_HOTPLUG_ICP(PORT_TC5), + [HPD_PORT_I] = SDE_TC_HOTPLUG_ICP(PORT_TC6), }; void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr, @@ -419,7 +412,7 @@ void gen9_reset_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - assert_rpm_wakelock_held(>->i915->runtime_pm); + assert_rpm_wakelock_held(gt->uncore->rpm); spin_lock_irq(>->irq_lock); gen6_gt_pm_reset_iir(gt, gt->pm_guc_events); @@ -430,7 +423,7 @@ void gen9_enable_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - assert_rpm_wakelock_held(>->i915->runtime_pm); + assert_rpm_wakelock_held(gt->uncore->rpm); spin_lock_irq(>->irq_lock); if (!guc->interrupts.enabled) { @@ -447,7 +440,7 @@ void gen9_disable_guc_interrupts(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); - assert_rpm_wakelock_held(>->i915->runtime_pm); + assert_rpm_wakelock_held(gt->uncore->rpm); spin_lock_irq(>->irq_lock); guc->interrupts.enabled = false; @@ -942,14 +935,14 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) return (position + crtc->scanline_offset) % vtotal; } -bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, +bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int index, bool in_vblank_irq, int *vpos, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = intel_get_crtc_for_pipe(dev_priv, - pipe); + struct intel_crtc *crtc = to_intel_crtc(drm_crtc_from_index(dev, index)); + enum pipe pipe = crtc->pipe; int position; int vbl_start, vbl_end, hsync_start, htotal, vtotal; unsigned long irqflags; @@ -992,7 +985,7 @@ bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, /* No obvious pixelcount register. Only query vertical * scanout position from Display scan line register. */ - position = __intel_get_crtc_scanline(intel_crtc); + position = __intel_get_crtc_scanline(crtc); } else { /* Have access to pixelcount since start of frame. * We can split this into vertical and horizontal @@ -1401,11 +1394,11 @@ static bool icp_ddi_port_hotplug_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { case HPD_PORT_A: - return val & ICP_DDIA_HPD_LONG_DETECT; + return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_A); case HPD_PORT_B: - return val & ICP_DDIB_HPD_LONG_DETECT; + return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_B); case HPD_PORT_C: - return val & TGP_DDIC_HPD_LONG_DETECT; + return val & SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(PORT_C); default: return false; } @@ -1427,20 +1420,6 @@ static bool icp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val) } } -static bool tgp_ddi_port_hotplug_long_detect(enum hpd_pin pin, u32 val) -{ - switch (pin) { - case HPD_PORT_A: - return val & ICP_DDIA_HPD_LONG_DETECT; - case HPD_PORT_B: - return val & ICP_DDIB_HPD_LONG_DETECT; - case HPD_PORT_C: - return val & TGP_DDIC_HPD_LONG_DETECT; - default: - return false; - } -} - static bool tgp_tc_port_hotplug_long_detect(enum hpd_pin pin, u32 val) { switch (pin) { @@ -1716,7 +1695,7 @@ static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv) static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv, u32 iir, u32 pipe_stats[I915_MAX_PIPES]) { - int pipe; + enum pipe pipe; spin_lock(&dev_priv->irq_lock); @@ -1741,6 +1720,7 @@ static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv, status_mask = PIPE_FIFO_UNDERRUN_STATUS; switch (pipe) { + default: case PIPE_A: iir_bit = I915_DISPLAY_PIPE_A_EVENT_INTERRUPT; break; @@ -2136,7 +2116,7 @@ static void ibx_hpd_irq_handler(struct drm_i915_private *dev_priv, static void ibx_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { - int pipe; + enum pipe pipe; u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK; ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ibx); @@ -2222,7 +2202,7 @@ static void cpt_serr_int_handler(struct drm_i915_private *dev_priv) static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { - int pipe; + enum pipe pipe; u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK_CPT; ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_cpt); @@ -2256,19 +2236,35 @@ static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) cpt_serr_int_handler(dev_priv); } -static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir, - const u32 *pins) +static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) { - u32 ddi_hotplug_trigger; - u32 tc_hotplug_trigger; + u32 ddi_hotplug_trigger, tc_hotplug_trigger; u32 pin_mask = 0, long_mask = 0; + bool (*tc_port_hotplug_long_detect)(enum hpd_pin pin, u32 val); + const u32 *pins; - if (HAS_PCH_MCC(dev_priv)) { + if (HAS_PCH_TGP(dev_priv)) { + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; + tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP; + tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect; + pins = hpd_tgp; + } else if (HAS_PCH_JSP(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; tc_hotplug_trigger = 0; + pins = hpd_tgp; + } else if (HAS_PCH_MCC(dev_priv)) { + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; + tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1); + tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; + pins = hpd_icp; } else { + WARN(!HAS_PCH_ICP(dev_priv), + "Unrecognized PCH type 0x%x\n", INTEL_PCH_TYPE(dev_priv)); + ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; + tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; + pins = hpd_icp; } if (ddi_hotplug_trigger) { @@ -2292,44 +2288,7 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir, intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, tc_hotplug_trigger, dig_hotplug_reg, pins, - icp_tc_port_hotplug_long_detect); - } - - if (pin_mask) - intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); - - if (pch_iir & SDE_GMBUS_ICP) - gmbus_irq_handler(dev_priv); -} - -static void tgp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) -{ - u32 ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; - u32 tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP; - u32 pin_mask = 0, long_mask = 0; - - if (ddi_hotplug_trigger) { - u32 dig_hotplug_reg; - - dig_hotplug_reg = I915_READ(SHOTPLUG_CTL_DDI); - I915_WRITE(SHOTPLUG_CTL_DDI, dig_hotplug_reg); - - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - ddi_hotplug_trigger, - dig_hotplug_reg, hpd_tgp, - tgp_ddi_port_hotplug_long_detect); - } - - if (tc_hotplug_trigger) { - u32 dig_hotplug_reg; - - dig_hotplug_reg = I915_READ(SHOTPLUG_CTL_TC); - I915_WRITE(SHOTPLUG_CTL_TC, dig_hotplug_reg); - - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - tc_hotplug_trigger, - dig_hotplug_reg, hpd_tgp, - tgp_tc_port_hotplug_long_detect); + tc_port_hotplug_long_detect); } if (pin_mask) @@ -2655,11 +2614,21 @@ gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir) } if (iir & GEN8_DE_EDP_PSR) { - u32 psr_iir = I915_READ(EDP_PSR_IIR); + u32 psr_iir; + i915_reg_t iir_reg; + + if (INTEL_GEN(dev_priv) >= 12) + iir_reg = TRANS_PSR_IIR(dev_priv->psr.transcoder); + else + iir_reg = EDP_PSR_IIR; + + psr_iir = I915_READ(iir_reg); + I915_WRITE(iir_reg, psr_iir); + + if (psr_iir) + found = true; intel_psr_irq_handler(dev_priv, psr_iir); - I915_WRITE(EDP_PSR_IIR, psr_iir); - found = true; } if (!found) @@ -2780,12 +2749,8 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) I915_WRITE(SDEIIR, iir); ret = IRQ_HANDLED; - if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP) - tgp_irq_handler(dev_priv, iir); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_MCC) - icp_irq_handler(dev_priv, iir, hpd_mcc); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) - icp_irq_handler(dev_priv, iir, hpd_icp); + if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) + icp_irq_handler(dev_priv, iir); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT) spt_irq_handler(dev_priv, iir); else @@ -2952,12 +2917,18 @@ int i8xx_enable_vblank(struct drm_crtc *crtc) return 0; } -int i945gm_enable_vblank(struct drm_crtc *crtc) +int i915gm_enable_vblank(struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); - if (dev_priv->i945gm_vblank.enabled++ == 0) - schedule_work(&dev_priv->i945gm_vblank.work); + /* + * Vblank interrupts fail to wake the device up from C2+. + * Disabling render clock gating during C-states avoids + * the problem. There is a small power cost so we do this + * only when vblank interrupts are actually enabled. + */ + if (dev_priv->vblank_enabled++ == 0) + I915_WRITE(SCPD0, _MASKED_BIT_ENABLE(CSTATE_RENDER_CLOCK_GATE_DISABLE)); return i8xx_enable_vblank(crtc); } @@ -3030,14 +3001,14 @@ void i8xx_disable_vblank(struct drm_crtc *crtc) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -void i945gm_disable_vblank(struct drm_crtc *crtc) +void i915gm_disable_vblank(struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->dev); i8xx_disable_vblank(crtc); - if (--dev_priv->i945gm_vblank.enabled == 0) - schedule_work(&dev_priv->i945gm_vblank.work); + if (--dev_priv->vblank_enabled == 0) + I915_WRITE(SCPD0, _MASKED_BIT_DISABLE(CSTATE_RENDER_CLOCK_GATE_DISABLE)); } void i965_disable_vblank(struct drm_crtc *crtc) @@ -3076,60 +3047,6 @@ void bdw_disable_vblank(struct drm_crtc *crtc) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -static void i945gm_vblank_work_func(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, i945gm_vblank.work); - - /* - * Vblank interrupts fail to wake up the device from C3, - * hence we want to prevent C3 usage while vblank interrupts - * are enabled. - */ - pm_qos_update_request(&dev_priv->i945gm_vblank.pm_qos, - READ_ONCE(dev_priv->i945gm_vblank.enabled) ? - dev_priv->i945gm_vblank.c3_disable_latency : - PM_QOS_DEFAULT_VALUE); -} - -static int cstate_disable_latency(const char *name) -{ - const struct cpuidle_driver *drv; - int i; - - drv = cpuidle_get_driver(); - if (!drv) - return 0; - - for (i = 0; i < drv->state_count; i++) { - const struct cpuidle_state *state = &drv->states[i]; - - if (!strcmp(state->name, name)) - return state->exit_latency ? - state->exit_latency - 1 : 0; - } - - return 0; -} - -static void i945gm_vblank_work_init(struct drm_i915_private *dev_priv) -{ - INIT_WORK(&dev_priv->i945gm_vblank.work, - i945gm_vblank_work_func); - - dev_priv->i945gm_vblank.c3_disable_latency = - cstate_disable_latency("C3"); - pm_qos_add_request(&dev_priv->i945gm_vblank.pm_qos, - PM_QOS_CPU_DMA_LATENCY, - PM_QOS_DEFAULT_VALUE); -} - -static void i945gm_vblank_work_fini(struct drm_i915_private *dev_priv) -{ - cancel_work_sync(&dev_priv->i945gm_vblank.work); - pm_qos_remove_request(&dev_priv->i945gm_vblank.pm_qos); -} - static void ibx_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; @@ -3246,7 +3163,7 @@ static void valleyview_irq_reset(struct drm_i915_private *dev_priv) static void gen8_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; - int pipe; + enum pipe pipe; gen8_master_intr_disable(dev_priv->uncore.regs); @@ -3271,7 +3188,7 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv) static void gen11_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; - int pipe; + enum pipe pipe; gen11_master_intr_disable(dev_priv->uncore.regs); @@ -3279,8 +3196,23 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv) intel_uncore_write(uncore, GEN11_DISPLAY_INT_CTL, 0); - intel_uncore_write(uncore, EDP_PSR_IMR, 0xffffffff); - intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff); + if (INTEL_GEN(dev_priv) >= 12) { + enum transcoder trans; + + for (trans = TRANSCODER_A; trans <= TRANSCODER_D; trans++) { + enum intel_display_power_domain domain; + + domain = POWER_DOMAIN_TRANSCODER(trans); + if (!intel_display_power_is_enabled(dev_priv, domain)) + continue; + + intel_uncore_write(uncore, TRANS_PSR_IMR(trans), 0xffffffff); + intel_uncore_write(uncore, TRANS_PSR_IIR(trans), 0xffffffff); + } + } else { + intel_uncore_write(uncore, EDP_PSR_IMR, 0xffffffff); + intel_uncore_write(uncore, EDP_PSR_IIR, 0xffffffff); + } for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, @@ -3431,42 +3363,44 @@ static void icp_hpd_detection_setup(struct drm_i915_private *dev_priv, } } -static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv) +static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, + u32 sde_ddi_mask, u32 sde_tc_mask, + u32 ddi_enable_mask, u32 tc_enable_mask, + const u32 *pins) { u32 hotplug_irqs, enabled_irqs; - hotplug_irqs = SDE_DDI_MASK_ICP | SDE_TC_MASK_ICP; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_icp); + hotplug_irqs = sde_ddi_mask | sde_tc_mask; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, pins); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); - icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, - ICP_TC_HPD_ENABLE_MASK); + icp_hpd_detection_setup(dev_priv, ddi_enable_mask, tc_enable_mask); } +/* + * EHL doesn't need most of gen11_hpd_irq_setup, it's handling only the + * equivalent of SDE. + */ static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, enabled_irqs; - - hotplug_irqs = SDE_DDI_MASK_TGP; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_mcc); - - ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); - - icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0); + icp_hpd_irq_setup(dev_priv, + SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1), + ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1), + hpd_icp); } -static void tgp_hpd_irq_setup(struct drm_i915_private *dev_priv) +/* + * JSP behaves exactly the same as MCC above except that port C is mapped to + * the DDI-C pins instead of the TC1 pins. This means we should follow TGP's + * masks & tables rather than ICP's masks & tables. + */ +static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, enabled_irqs; - - hotplug_irqs = SDE_DDI_MASK_TGP | SDE_TC_MASK_TGP; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_tgp); - - ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); - - icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, - TGP_TC_HPD_ENABLE_MASK); + icp_hpd_irq_setup(dev_priv, + SDE_DDI_MASK_TGP, 0, + TGP_DDI_HPD_ENABLE_MASK, 0, + hpd_tgp); } static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -3506,9 +3440,13 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) gen11_hpd_detection_setup(dev_priv); if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP) - tgp_hpd_irq_setup(dev_priv); + icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, SDE_TC_MASK_TGP, + TGP_DDI_HPD_ENABLE_MASK, + TGP_TC_HPD_ENABLE_MASK, hpd_tgp); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) - icp_hpd_irq_setup(dev_priv); + icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_ICP, SDE_TC_MASK_ICP, + ICP_DDI_HPD_ENABLE_MASK, + ICP_TC_HPD_ENABLE_MASK, hpd_icp); } static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -3684,7 +3622,6 @@ static void ironlake_irq_postinstall(struct drm_i915_private *dev_priv) if (IS_HASWELL(dev_priv)) { gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR); - intel_psr_irq_control(dev_priv, dev_priv->psr.debug); display_mask |= DE_EDP_PSR_INT_HSW; } @@ -3794,8 +3731,21 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (IS_BROADWELL(dev_priv)) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; - gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR); - intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + if (INTEL_GEN(dev_priv) >= 12) { + enum transcoder trans; + + for (trans = TRANSCODER_A; trans <= TRANSCODER_D; trans++) { + enum intel_display_power_domain domain; + + domain = POWER_DOMAIN_TRANSCODER(trans); + if (!intel_display_power_is_enabled(dev_priv, domain)) + continue; + + gen3_assert_iir_is_zero(uncore, TRANS_PSR_IIR(trans)); + } + } else { + gen3_assert_iir_is_zero(uncore, EDP_PSR_IIR); + } for_each_pipe(dev_priv, pipe) { dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; @@ -3853,8 +3803,11 @@ static void icp_irq_postinstall(struct drm_i915_private *dev_priv) if (HAS_PCH_TGP(dev_priv)) icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, TGP_TC_HPD_ENABLE_MASK); - else if (HAS_PCH_MCC(dev_priv)) + else if (HAS_PCH_JSP(dev_priv)) icp_hpd_detection_setup(dev_priv, TGP_DDI_HPD_ENABLE_MASK, 0); + else if (HAS_PCH_MCC(dev_priv)) + icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, + ICP_TC_HPD_ENABLE(PORT_TC1)); else icp_hpd_detection_setup(dev_priv, ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE_MASK); @@ -4320,9 +4273,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv) struct intel_rps *rps = &dev_priv->gt_pm.rps; int i; - if (IS_I945GM(dev_priv)) - i945gm_vblank_work_init(dev_priv); - intel_hpd_init_work(dev_priv); INIT_WORK(&rps->work, gen6_pm_rps_work); @@ -4387,8 +4337,9 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (I915_HAS_HOTPLUG(dev_priv)) dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; } else { - if (HAS_PCH_MCC(dev_priv)) - /* EHL doesn't need most of gen11_hpd_irq_setup */ + if (HAS_PCH_JSP(dev_priv)) + dev_priv->display.hpd_irq_setup = jsp_hpd_irq_setup; + else if (HAS_PCH_MCC(dev_priv)) dev_priv->display.hpd_irq_setup = mcc_hpd_irq_setup; else if (INTEL_GEN(dev_priv) >= 11) dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup; @@ -4411,9 +4362,6 @@ void intel_irq_fini(struct drm_i915_private *i915) { int i; - if (IS_I945GM(i915)) - i945gm_vblank_work_fini(i915); - for (i = 0; i < MAX_L3_SLICES; ++i) kfree(i915->l3_parity.remap_info[i]); } @@ -4538,10 +4486,10 @@ void intel_irq_uninstall(struct drm_i915_private *dev_priv) int irq = dev_priv->drm.pdev->irq; /* - * FIXME we can get called twice during driver load - * error handling due to intel_modeset_cleanup() - * calling us out of sequence. Would be nice if - * it didn't do that... + * FIXME we can get called twice during driver probe + * error handling as well as during driver remove due to + * intel_modeset_driver_remove() calling us out of sequence. + * Would be nice if it didn't do that... */ if (!dev_priv->drm.irq_enabled) return; diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h index 8e7e6071777e..19a3bc019535 100644 --- a/drivers/gpu/drm/i915/i915_irq.h +++ b/drivers/gpu/drm/i915/i915_irq.h @@ -122,12 +122,12 @@ u32 i915_get_vblank_counter(struct drm_crtc *crtc); u32 g4x_get_vblank_counter(struct drm_crtc *crtc); int i8xx_enable_vblank(struct drm_crtc *crtc); -int i945gm_enable_vblank(struct drm_crtc *crtc); +int i915gm_enable_vblank(struct drm_crtc *crtc); int i965_enable_vblank(struct drm_crtc *crtc); int ilk_enable_vblank(struct drm_crtc *crtc); int bdw_enable_vblank(struct drm_crtc *crtc); void i8xx_disable_vblank(struct drm_crtc *crtc); -void i945gm_disable_vblank(struct drm_crtc *crtc); +void i915gm_disable_vblank(struct drm_crtc *crtc); void i965_disable_vblank(struct drm_crtc *crtc); void ilk_disable_vblank(struct drm_crtc *crtc); void bdw_disable_vblank(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 296452f9efe4..4f1806f65040 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -46,7 +46,8 @@ i915_param_named(modeset, int, 0400, i915_param_named_unsafe(enable_dc, int, 0400, "Enable power-saving display C-states. " - "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6)"); + "(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6; " + "3=up to DC5 with DC3CO; 4=up to DC6 with DC3CO)"); i915_param_named_unsafe(enable_fbc, int, 0600, "Enable frame buffer compression for power savings " diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 1974e4c78a43..f9a3bfe68689 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -23,7 +23,6 @@ */ #include <linux/console.h> -#include <linux/vgaarb.h> #include <linux/vga_switcheroo.h> #include <drm/drm_drv.h> @@ -118,6 +117,14 @@ [PIPE_C] = IVB_CURSOR_C_OFFSET, \ } +#define TGL_CURSOR_OFFSETS \ + .cursor_offsets = { \ + [PIPE_A] = CURSOR_A_OFFSET, \ + [PIPE_B] = IVB_CURSOR_B_OFFSET, \ + [PIPE_C] = IVB_CURSOR_C_OFFSET, \ + [PIPE_D] = TGL_CURSOR_D_OFFSET, \ + } + #define I9XX_COLORS \ .color = { .gamma_lut_size = 256 } #define I965_COLORS \ @@ -144,10 +151,13 @@ #define GEN_DEFAULT_PAGE_SIZES \ .page_sizes = I915_GTT_PAGE_SIZE_4K +#define GEN_DEFAULT_REGIONS \ + .memory_regions = REGION_SMEM | REGION_STOLEN + #define I830_FEATURES \ GEN(2), \ .is_mobile = 1, \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_overlay = 1, \ .display.cursor_needs_physical = 1, \ .display.overlay_needs_physical = 1, \ @@ -161,11 +171,12 @@ I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I9XX_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS #define I845_FEATURES \ GEN(2), \ - .num_pipes = 1, \ + .pipe_mask = BIT(PIPE_A), \ .display.has_overlay = 1, \ .display.overlay_needs_physical = 1, \ .display.has_gmch = 1, \ @@ -178,7 +189,8 @@ I845_PIPE_OFFSETS, \ I845_CURSOR_OFFSETS, \ I9XX_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS static const struct intel_device_info intel_i830_info = { I830_FEATURES, @@ -203,7 +215,7 @@ static const struct intel_device_info intel_i865g_info = { #define GEN3_FEATURES \ GEN(3), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ .engine_mask = BIT(RCS0), \ @@ -212,7 +224,8 @@ static const struct intel_device_info intel_i865g_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I9XX_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS static const struct intel_device_info intel_i915g_info = { GEN3_FEATURES, @@ -287,7 +300,7 @@ static const struct intel_device_info intel_pineview_m_info = { #define GEN4_FEATURES \ GEN(4), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_hotplug = 1, \ .display.has_gmch = 1, \ .gpu_reset_clobbers_display = true, \ @@ -297,7 +310,8 @@ static const struct intel_device_info intel_pineview_m_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ I965_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS static const struct intel_device_info intel_i965g_info = { GEN4_FEATURES, @@ -337,7 +351,7 @@ static const struct intel_device_info intel_gm45_info = { #define GEN5_FEATURES \ GEN(5), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_hotplug = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0), \ .has_snoop = true, \ @@ -347,7 +361,8 @@ static const struct intel_device_info intel_gm45_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ ILK_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS static const struct intel_device_info intel_ironlake_d_info = { GEN5_FEATURES, @@ -363,7 +378,7 @@ static const struct intel_device_info intel_ironlake_m_info = { #define GEN6_FEATURES \ GEN(6), \ - .num_pipes = 2, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ @@ -377,7 +392,8 @@ static const struct intel_device_info intel_ironlake_m_info = { I9XX_PIPE_OFFSETS, \ I9XX_CURSOR_OFFSETS, \ ILK_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS #define SNB_D_PLATFORM \ GEN6_FEATURES, \ @@ -411,7 +427,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { #define GEN7_FEATURES \ GEN(7), \ - .num_pipes = 3, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ .display.has_hotplug = 1, \ .display.has_fbc = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0), \ @@ -420,12 +436,13 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { .has_rc6 = 1, \ .has_rc6p = 1, \ .has_rps = true, \ - .ppgtt_type = INTEL_PPGTT_FULL, \ + .ppgtt_type = INTEL_PPGTT_ALIASING, \ .ppgtt_size = 31, \ IVB_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ IVB_COLORS, \ - GEN_DEFAULT_PAGE_SIZES + GEN_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS #define IVB_D_PLATFORM \ GEN7_FEATURES, \ @@ -462,7 +479,7 @@ static const struct intel_device_info intel_ivybridge_q_info = { GEN7_FEATURES, PLATFORM(INTEL_IVYBRIDGE), .gt = 2, - .num_pipes = 0, /* legal, last one wins */ + .pipe_mask = 0, /* legal, last one wins */ .has_l3_dpf = 1, }; @@ -470,13 +487,13 @@ static const struct intel_device_info intel_valleyview_info = { PLATFORM(INTEL_VALLEYVIEW), GEN(7), .is_lp = 1, - .num_pipes = 2, + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B), .has_runtime_pm = 1, .has_rc6 = 1, .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, - .ppgtt_type = INTEL_PPGTT_FULL, + .ppgtt_type = INTEL_PPGTT_ALIASING, .ppgtt_size = 31, .has_snoop = true, .has_coherent_ggtt = false, @@ -486,6 +503,7 @@ static const struct intel_device_info intel_valleyview_info = { I9XX_CURSOR_OFFSETS, I965_COLORS, GEN_DEFAULT_PAGE_SIZES, + GEN_DEFAULT_REGIONS, }; #define G75_FEATURES \ @@ -560,7 +578,7 @@ static const struct intel_device_info intel_broadwell_gt3_info = { static const struct intel_device_info intel_cherryview_info = { PLATFORM(INTEL_CHERRYVIEW), GEN(8), - .num_pipes = 3, + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), .display.has_hotplug = 1, .is_lp = 1, .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), @@ -570,7 +588,7 @@ static const struct intel_device_info intel_cherryview_info = { .has_rps = true, .has_logical_ring_contexts = 1, .display.has_gmch = 1, - .ppgtt_type = INTEL_PPGTT_FULL, + .ppgtt_type = INTEL_PPGTT_ALIASING, .ppgtt_size = 32, .has_reset_engine = 1, .has_snoop = true, @@ -580,6 +598,7 @@ static const struct intel_device_info intel_cherryview_info = { CHV_CURSOR_OFFSETS, CHV_COLORS, GEN_DEFAULT_PAGE_SIZES, + GEN_DEFAULT_REGIONS, }; #define GEN9_DEFAULT_PAGE_SIZES \ @@ -631,7 +650,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { .is_lp = 1, \ .display.has_hotplug = 1, \ .engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0) | BIT(VECS0), \ - .num_pipes = 3, \ + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C), \ .has_64bit_reloc = 1, \ .display.has_ddi = 1, \ .has_fpga_dbg = 1, \ @@ -654,7 +673,8 @@ static const struct intel_device_info intel_skylake_gt4_info = { HSW_PIPE_OFFSETS, \ IVB_CURSOR_OFFSETS, \ IVB_COLORS, \ - GEN9_DEFAULT_PAGE_SIZES + GEN9_DEFAULT_PAGE_SIZES, \ + GEN_DEFAULT_REGIONS static const struct intel_device_info intel_broxton_info = { GEN9_LP_FEATURES, @@ -787,16 +807,19 @@ static const struct intel_device_info intel_elkhartlake_info = { [TRANSCODER_DSI_0] = TRANSCODER_DSI0_OFFSET, \ [TRANSCODER_DSI_1] = TRANSCODER_DSI1_OFFSET, \ }, \ - .has_global_mocs = 1 + TGL_CURSOR_OFFSETS, \ + .has_global_mocs = 1, \ + .display.has_dsb = 1 static const struct intel_device_info intel_tigerlake_12_info = { GEN12_FEATURES, PLATFORM(INTEL_TIGERLAKE), - .num_pipes = 4, + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), .require_force_probe = 1, .display.has_modular_fia = 1, .engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), + .has_rps = false, /* XXX disabled for debugging */ }; #undef GEN diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index e42b86827d6b..d2ac51fe4f04 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -196,7 +196,9 @@ #include <linux/uuid.h> #include "gem/i915_gem_context.h" -#include "gem/i915_gem_pm.h" +#include "gt/intel_engine_pm.h" +#include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" #include "gt/intel_lrc_reg.h" #include "i915_drv.h" @@ -342,11 +344,14 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { * struct perf_open_properties - for validated properties given to open a stream * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags * @single_context: Whether a single or all gpu contexts should be monitored + * @hold_preemption: Whether the preemption is disabled for the filtered + * context * @ctx_handle: A gem ctx handle for use with @single_context * @metrics_set: An ID for an OA unit metric set advertised via sysfs * @oa_format: An OA unit HW report format * @oa_periodic: Whether to enable periodic OA unit sampling * @oa_period_exponent: The OA unit sampling period is derived from this + * @engine: The engine (typically rcs0) being monitored by the OA unit * * As read_properties_unlocked() enumerates and validates the properties given * to open a stream of metrics the configuration is built up in the structure @@ -356,6 +361,7 @@ struct perf_open_properties { u32 sample_flags; u64 single_context:1; + u64 hold_preemption:1; u64 ctx_handle; /* OA sampling state */ @@ -363,69 +369,66 @@ struct perf_open_properties { int oa_format; bool oa_periodic; int oa_period_exponent; + + struct intel_engine_cs *engine; +}; + +struct i915_oa_config_bo { + struct llist_node node; + + struct i915_oa_config *oa_config; + struct i915_vma *vma; }; static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); -static void free_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) +void i915_oa_config_release(struct kref *ref) { - if (!PTR_ERR(oa_config->flex_regs)) - kfree(oa_config->flex_regs); - if (!PTR_ERR(oa_config->b_counter_regs)) - kfree(oa_config->b_counter_regs); - if (!PTR_ERR(oa_config->mux_regs)) - kfree(oa_config->mux_regs); - kfree(oa_config); -} + struct i915_oa_config *oa_config = + container_of(ref, typeof(*oa_config), ref); -static void put_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) -{ - if (!atomic_dec_and_test(&oa_config->ref_count)) - return; + kfree(oa_config->flex_regs); + kfree(oa_config->b_counter_regs); + kfree(oa_config->mux_regs); - free_oa_config(dev_priv, oa_config); + kfree_rcu(oa_config, rcu); } -static int get_oa_config(struct drm_i915_private *dev_priv, - int metrics_set, - struct i915_oa_config **out_config) +struct i915_oa_config * +i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set) { - int ret; - - if (metrics_set == 1) { - *out_config = &dev_priv->perf.test_config; - atomic_inc(&dev_priv->perf.test_config.ref_count); - return 0; - } - - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); - if (ret) - return ret; + struct i915_oa_config *oa_config; - *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); - if (!*out_config) - ret = -EINVAL; + rcu_read_lock(); + if (metrics_set == 1) + oa_config = &perf->test_config; else - atomic_inc(&(*out_config)->ref_count); + oa_config = idr_find(&perf->metrics_idr, metrics_set); + if (oa_config) + oa_config = i915_oa_config_get(oa_config); + rcu_read_unlock(); - mutex_unlock(&dev_priv->perf.metrics_lock); + return oa_config; +} - return ret; +static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) +{ + i915_oa_config_put(oa_bo->oa_config); + i915_vma_put(oa_bo->vma); + kfree(oa_bo); } static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; - return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; + return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; } static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - u32 oastatus1 = I915_READ(GEN7_OASTATUS1); + struct intel_uncore *uncore = stream->uncore; + u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; } @@ -456,7 +459,6 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) */ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; int report_size = stream->oa_buffer.format_size; unsigned long flags; unsigned int aged_idx; @@ -479,7 +481,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) aged_tail = stream->oa_buffer.tails[aged_idx].offset; aging_tail = stream->oa_buffer.tails[!aged_idx].offset; - hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream); + hw_tail = stream->perf->ops.oa_hw_tail_read(stream); /* The tail pointer increases in 64 byte increments, * not in report_size steps... @@ -655,7 +657,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; int report_size = stream->oa_buffer.format_size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); @@ -740,7 +742,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & OAREPORT_REASON_MASK); if (reason == 0) { - if (__ratelimit(&dev_priv->perf.spurious_report_rs)) + if (__ratelimit(&stream->perf->spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); continue; } @@ -755,7 +757,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & dev_priv->perf.gen8_valid_ctx_bit)) + if (!(report32[0] & stream->perf->gen8_valid_ctx_bit)) ctx_id = report32[2] = INVALID_CTX_ID; /* @@ -789,7 +791,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * switches since it's not-uncommon for periodic samples to * identify a switch before any 'context switch' report. */ - if (!dev_priv->perf.exclusive_stream->ctx || + if (!stream->perf->exclusive_stream->ctx || stream->specific_ctx_id == ctx_id || stream->oa_buffer.last_ctx_id == stream->specific_ctx_id || reason & OAREPORT_REASON_CTX_SWITCH) { @@ -798,7 +800,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * While filtering for a single context we avoid * leaking the IDs of other contexts. */ - if (dev_priv->perf.exclusive_stream->ctx && + if (stream->perf->exclusive_stream->ctx && stream->specific_ctx_id != ctx_id) { report32[2] = INVALID_CTX_ID; } @@ -830,7 +832,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, */ head += gtt_offset; - I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK); + intel_uncore_write(uncore, GEN8_OAHEADPTR, + head & GEN8_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -864,14 +867,14 @@ static int gen8_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 oastatus; int ret; if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus = I915_READ(GEN8_OASTATUS); + oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); /* * We treat OABUFFER_OVERFLOW as a significant error: @@ -896,14 +899,14 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", stream->period_exponent); - dev_priv->perf.ops.oa_disable(stream); - dev_priv->perf.ops.oa_enable(stream); + stream->perf->ops.oa_disable(stream); + stream->perf->ops.oa_enable(stream); /* * Note: .oa_enable() is expected to re-init the oabuffer and * reset GEN8_OASTATUS for us */ - oastatus = I915_READ(GEN8_OASTATUS); + oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); } if (oastatus & GEN8_OASTATUS_REPORT_LOST) { @@ -911,8 +914,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - I915_WRITE(GEN8_OASTATUS, - oastatus & ~GEN8_OASTATUS_REPORT_LOST); + intel_uncore_write(uncore, GEN8_OASTATUS, + oastatus & ~GEN8_OASTATUS_REPORT_LOST); } return gen8_append_oa_reports(stream, buf, count, offset); @@ -943,7 +946,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; int report_size = stream->oa_buffer.format_size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); @@ -1017,7 +1020,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, * copying it to userspace... */ if (report32[0] == 0) { - if (__ratelimit(&dev_priv->perf.spurious_report_rs)) + if (__ratelimit(&stream->perf->spurious_report_rs)) DRM_NOTE("Skipping spurious, invalid OA report\n"); continue; } @@ -1043,9 +1046,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, */ head += gtt_offset; - I915_WRITE(GEN7_OASTATUS2, - ((head & GEN7_OASTATUS2_HEAD_MASK) | - GEN7_OASTATUS2_MEM_SELECT_GGTT)); + intel_uncore_write(uncore, GEN7_OASTATUS2, + (head & GEN7_OASTATUS2_HEAD_MASK) | + GEN7_OASTATUS2_MEM_SELECT_GGTT); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -1075,21 +1078,21 @@ static int gen7_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 oastatus1; int ret; if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus1 = I915_READ(GEN7_OASTATUS1); + oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); /* XXX: On Haswell we don't have a safe way to clear oastatus1 * bits while the OA unit is enabled (while the tail pointer * may be updated asynchronously) so we ignore status bits * that have already been reported to userspace. */ - oastatus1 &= ~dev_priv->perf.gen7_latched_oastatus1; + oastatus1 &= ~stream->perf->gen7_latched_oastatus1; /* We treat OABUFFER_OVERFLOW as a significant error: * @@ -1120,10 +1123,10 @@ static int gen7_oa_read(struct i915_perf_stream *stream, DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", stream->period_exponent); - dev_priv->perf.ops.oa_disable(stream); - dev_priv->perf.ops.oa_enable(stream); + stream->perf->ops.oa_disable(stream); + stream->perf->ops.oa_enable(stream); - oastatus1 = I915_READ(GEN7_OASTATUS1); + oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1); } if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { @@ -1131,7 +1134,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - dev_priv->perf.gen7_latched_oastatus1 |= + stream->perf->gen7_latched_oastatus1 |= GEN7_OASTATUS1_REPORT_LOST; } @@ -1196,25 +1199,18 @@ static int i915_oa_read(struct i915_perf_stream *stream, size_t count, size_t *offset) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - return dev_priv->perf.ops.read(stream, buf, count, offset); + return stream->perf->ops.read(stream, buf, count, offset); } static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) { struct i915_gem_engines_iter it; - struct drm_i915_private *i915 = stream->dev_priv; struct i915_gem_context *ctx = stream->ctx; struct intel_context *ce; int err; - err = i915_mutex_lock_interruptible(&i915->drm); - if (err) - return ERR_PTR(err); - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - if (ce->engine->class != RENDER_CLASS) + if (ce->engine != stream->engine) /* first match! */ continue; /* @@ -1229,10 +1225,6 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) } i915_gem_context_unlock_engines(ctx); - mutex_unlock(&i915->drm.struct_mutex); - if (err) - return ERR_PTR(err); - return stream->pinned_ctx; } @@ -1248,14 +1240,13 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) */ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { - struct drm_i915_private *i915 = stream->dev_priv; struct intel_context *ce; ce = oa_pin_context(stream); if (IS_ERR(ce)) return PTR_ERR(ce); - switch (INTEL_GEN(i915)) { + switch (INTEL_GEN(ce->engine->i915)) { case 7: { /* * On Haswell we don't do any post processing of the reports @@ -1269,7 +1260,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) case 8: case 9: case 10: - if (USES_GUC_SUBMISSION(i915)) { + if (USES_GUC_SUBMISSION(ce->engine->i915)) { /* * When using GuC, the context descriptor we write in * i915 is read by GuC and rewritten before it's @@ -1292,28 +1283,24 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) } else { stream->specific_ctx_id_mask = (1U << GEN8_CTX_ID_WIDTH) - 1; - stream->specific_ctx_id = - upper_32_bits(ce->lrc_desc); - stream->specific_ctx_id &= - stream->specific_ctx_id_mask; + stream->specific_ctx_id = stream->specific_ctx_id_mask; } break; - case 11: { + case 11: + case 12: { stream->specific_ctx_id_mask = - ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) | - ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) | - ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32); - stream->specific_ctx_id = upper_32_bits(ce->lrc_desc); - stream->specific_ctx_id &= - stream->specific_ctx_id_mask; + ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + stream->specific_ctx_id = stream->specific_ctx_id_mask; break; } default: - MISSING_CASE(INTEL_GEN(i915)); + MISSING_CASE(INTEL_GEN(ce->engine->i915)); } + ce->tag = stream->specific_ctx_id_mask; + DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", stream->specific_ctx_id, stream->specific_ctx_id_mask); @@ -1330,69 +1317,76 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) */ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; struct intel_context *ce; - stream->specific_ctx_id = INVALID_CTX_ID; - stream->specific_ctx_id_mask = 0; - ce = fetch_and_zero(&stream->pinned_ctx); if (ce) { - mutex_lock(&dev_priv->drm.struct_mutex); + ce->tag = 0; /* recomputed on next submission after parking */ intel_context_unpin(ce); - mutex_unlock(&dev_priv->drm.struct_mutex); } + + stream->specific_ctx_id = INVALID_CTX_ID; + stream->specific_ctx_id_mask = 0; } static void free_oa_buffer(struct i915_perf_stream *stream) { - struct drm_i915_private *i915 = stream->dev_priv; - - mutex_lock(&i915->drm.struct_mutex); - i915_vma_unpin_and_release(&stream->oa_buffer.vma, I915_VMA_RELEASE_MAP); - mutex_unlock(&i915->drm.struct_mutex); - stream->oa_buffer.vaddr = NULL; } +static void +free_oa_configs(struct i915_perf_stream *stream) +{ + struct i915_oa_config_bo *oa_bo, *tmp; + + i915_oa_config_put(stream->oa_config); + llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) + free_oa_config_bo(oa_bo); +} + +static void +free_noa_wait(struct i915_perf_stream *stream) +{ + i915_vma_unpin_and_release(&stream->noa_wait, 0); +} + static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; - BUG_ON(stream != dev_priv->perf.exclusive_stream); + BUG_ON(stream != perf->exclusive_stream); /* * Unset exclusive_stream first, it will be checked while disabling * the metric set on gen8+. */ - mutex_lock(&dev_priv->drm.struct_mutex); - dev_priv->perf.exclusive_stream = NULL; - dev_priv->perf.ops.disable_metric_set(stream); - mutex_unlock(&dev_priv->drm.struct_mutex); + perf->exclusive_stream = NULL; + perf->ops.disable_metric_set(stream); free_oa_buffer(stream); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); + intel_engine_pm_put(stream->engine); if (stream->ctx) oa_put_render_ctx_id(stream); - put_oa_config(dev_priv, stream->oa_config); + free_oa_configs(stream); + free_noa_wait(stream); - if (dev_priv->perf.spurious_report_rs.missed) { + if (perf->spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", - dev_priv->perf.spurious_report_rs.missed); + perf->spurious_report_rs.missed); } } static void gen7_init_oa_buffer(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; @@ -1401,13 +1395,14 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) /* Pre-DevBDW: OABUFFER must be set with counters off, * before OASTATUS1, but after OASTATUS2 */ - I915_WRITE(GEN7_OASTATUS2, - gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */ + intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */ + gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); stream->oa_buffer.head = gtt_offset; - I915_WRITE(GEN7_OABUFFER, gtt_offset); + intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset); - I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ + intel_uncore_write(uncore, GEN7_OASTATUS1, /* tail */ + gtt_offset | OABUFFER_SIZE_16M); /* Mark that we need updated tail pointers to read from... */ stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; @@ -1419,7 +1414,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) * already seen since they can't be cleared while periodic * sampling is enabled. */ - dev_priv->perf.gen7_latched_oastatus1 = 0; + stream->perf->gen7_latched_oastatus1 = 0; /* NB: although the OA buffer will initially be allocated * zeroed via shmfs (and so this memset is redundant when @@ -1434,25 +1429,22 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) */ memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); - /* Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. - */ stream->pollin = false; } static void gen8_init_oa_buffer(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); unsigned long flags; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - I915_WRITE(GEN8_OASTATUS, 0); - I915_WRITE(GEN8_OAHEADPTR, gtt_offset); + intel_uncore_write(uncore, GEN8_OASTATUS, 0); + intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset); stream->oa_buffer.head = gtt_offset; - I915_WRITE(GEN8_OABUFFER_UDW, 0); + intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0); /* * PRM says: @@ -1462,9 +1454,9 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) * to enable proper functionality of the overflow * bit." */ - I915_WRITE(GEN8_OABUFFER, gtt_offset | + intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset | OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); - I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); + intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; @@ -1493,35 +1485,25 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) */ memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); - /* - * Maybe make ->pollin per-stream state if we support multiple - * concurrent streams in the future. - */ stream->pollin = false; } static int alloc_oa_buffer(struct i915_perf_stream *stream) { struct drm_i915_gem_object *bo; - struct drm_i915_private *dev_priv = stream->dev_priv; struct i915_vma *vma; int ret; if (WARN_ON(stream->oa_buffer.vma)) return -ENODEV; - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - return ret; - BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); - bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE); + bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE); if (IS_ERR(bo)) { DRM_ERROR("Failed to allocate OA buffer\n"); - ret = PTR_ERR(bo); - goto unlock; + return PTR_ERR(bo); } i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); @@ -1541,11 +1523,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) goto err_unpin; } - DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", - i915_ggtt_offset(stream->oa_buffer.vma), - stream->oa_buffer.vaddr); - - goto unlock; + return 0; err_unpin: __i915_vma_unpin(vma); @@ -1556,55 +1534,384 @@ err_unref: stream->oa_buffer.vaddr = NULL; stream->oa_buffer.vma = NULL; -unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); return ret; } -static void config_oa_regs(struct drm_i915_private *dev_priv, - const struct i915_oa_reg *regs, - u32 n_regs) +static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, + bool save, i915_reg_t reg, u32 offset, + u32 dword_count) +{ + u32 cmd; + u32 d; + + cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM; + if (INTEL_GEN(stream->perf->i915) >= 8) + cmd++; + + for (d = 0; d < dword_count; d++) { + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(reg) + 4 * d; + *cs++ = intel_gt_scratch_offset(stream->engine->gt, + offset) + 4 * d; + *cs++ = 0; + } + + return cs; +} + +static int alloc_noa_wait(struct i915_perf_stream *stream) +{ + struct drm_i915_private *i915 = stream->perf->i915; + struct drm_i915_gem_object *bo; + struct i915_vma *vma; + const u64 delay_ticks = 0xffffffffffffffff - + DIV64_U64_ROUND_UP( + atomic64_read(&stream->perf->noa_programming_delay) * + RUNTIME_INFO(i915)->cs_timestamp_frequency_khz, + 1000000ull); + const u32 base = stream->engine->mmio_base; +#define CS_GPR(x) GEN8_RING_CS_GPR(base, x) + u32 *batch, *ts0, *cs, *jump; + int ret, i; + enum { + START_TS, + NOW_TS, + DELTA_TS, + JUMP_PREDICATE, + DELTA_TARGET, + N_CS_GPR + }; + + bo = i915_gem_object_create_internal(i915, 4096); + if (IS_ERR(bo)) { + DRM_ERROR("Failed to allocate NOA wait batchbuffer\n"); + return PTR_ERR(bo); + } + + /* + * We pin in GGTT because we jump into this buffer now because + * multiple OA config BOs will have a jump to this address and it + * needs to be fixed during the lifetime of the i915/perf stream. + */ + vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); + if (IS_ERR(batch)) { + ret = PTR_ERR(batch); + goto err_unpin; + } + + /* Save registers. */ + for (i = 0; i < N_CS_GPR; i++) + cs = save_restore_register( + stream, cs, true /* save */, CS_GPR(i), + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + cs = save_restore_register( + stream, cs, true /* save */, MI_PREDICATE_RESULT_1, + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + + /* First timestamp snapshot location. */ + ts0 = cs; + + /* + * Initial snapshot of the timestamp register to implement the wait. + * We work with 32b values, so clear out the top 32b bits of the + * register because the ALU works 64bits. + */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4; + *cs++ = 0; + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); + *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)); + + /* + * This is the location we're going to jump back into until the + * required amount of time has passed. + */ + jump = cs; + + /* + * Take another snapshot of the timestamp register. Take care to clear + * up the top 32bits of CS_GPR(1) as we're using it for other + * operations below. + */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4; + *cs++ = 0; + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)); + *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)); + + /* + * Do a diff between the 2 timestamps and store the result back into + * CS_GPR(1). + */ + *cs++ = MI_MATH(5); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS)); + *cs++ = MI_MATH_SUB; + *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU); + *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); + + /* + * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the + * timestamp have rolled over the 32bits) into the predicate register + * to be used for the predicated jump. + */ + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); + *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); + + /* Restart from the beginning if we had timestamps roll over. */ + *cs++ = (INTEL_GEN(i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8) | + MI_BATCH_PREDICATE; + *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4; + *cs++ = 0; + + /* + * Now add the diff between to previous timestamps and add it to : + * (((1 * << 64) - 1) - delay_ns) + * + * When the Carry Flag contains 1 this means the elapsed time is + * longer than the expected delay, and we can exit the wait loop. + */ + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)); + *cs++ = lower_32_bits(delay_ticks); + *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4; + *cs++ = upper_32_bits(delay_ticks); + + *cs++ = MI_MATH(4); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET)); + *cs++ = MI_MATH_ADD; + *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF); + + /* + * Transfer the result into the predicate register to be used for the + * predicated jump. + */ + *cs++ = MI_LOAD_REGISTER_REG | (3 - 2); + *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)); + *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1); + + /* Predicate the jump. */ + *cs++ = (INTEL_GEN(i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8) | + MI_BATCH_PREDICATE; + *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4; + *cs++ = 0; + + /* Restore registers. */ + for (i = 0; i < N_CS_GPR; i++) + cs = save_restore_register( + stream, cs, false /* restore */, CS_GPR(i), + INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2); + cs = save_restore_register( + stream, cs, false /* restore */, MI_PREDICATE_RESULT_1, + INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1); + + /* And return to the ring. */ + *cs++ = MI_BATCH_BUFFER_END; + + GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch)); + + i915_gem_object_flush_map(bo); + i915_gem_object_unpin_map(bo); + + stream->noa_wait = vma; + return 0; + +err_unpin: + i915_vma_unpin_and_release(&vma, 0); +err_unref: + i915_gem_object_put(bo); + return ret; +} + +static u32 *write_cs_mi_lri(u32 *cs, + const struct i915_oa_reg *reg_data, + u32 n_regs) { u32 i; for (i = 0; i < n_regs; i++) { - const struct i915_oa_reg *reg = regs + i; + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { + u32 n_lri = min_t(u32, + n_regs - i, + MI_LOAD_REGISTER_IMM_MAX_REGS); + + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); + } + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); + *cs++ = reg_data[i].value; + } + + return cs; +} + +static int num_lri_dwords(int num_regs) +{ + int count = 0; + + if (num_regs > 0) { + count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); + count += num_regs * 2; + } + + return count; +} + +static struct i915_oa_config_bo * +alloc_oa_config_buffer(struct i915_perf_stream *stream, + struct i915_oa_config *oa_config) +{ + struct drm_i915_gem_object *obj; + struct i915_oa_config_bo *oa_bo; + size_t config_length = 0; + u32 *cs; + int err; + + oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); + if (!oa_bo) + return ERR_PTR(-ENOMEM); + + config_length += num_lri_dwords(oa_config->mux_regs_len); + config_length += num_lri_dwords(oa_config->b_counter_regs_len); + config_length += num_lri_dwords(oa_config->flex_regs_len); + config_length++; /* MI_BATCH_BUFFER_END */ + config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE); + + obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_free; + } + + cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_oa_bo; + } - I915_WRITE(reg->addr, reg->value); + cs = write_cs_mi_lri(cs, + oa_config->mux_regs, + oa_config->mux_regs_len); + cs = write_cs_mi_lri(cs, + oa_config->b_counter_regs, + oa_config->b_counter_regs_len); + cs = write_cs_mi_lri(cs, + oa_config->flex_regs, + oa_config->flex_regs_len); + + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + oa_bo->vma = i915_vma_instance(obj, + &stream->engine->gt->ggtt->vm, + NULL); + if (IS_ERR(oa_bo->vma)) { + err = PTR_ERR(oa_bo->vma); + goto err_oa_bo; } + + oa_bo->oa_config = i915_oa_config_get(oa_config); + llist_add(&oa_bo->node, &stream->oa_config_bos); + + return oa_bo; + +err_oa_bo: + i915_gem_object_put(obj); +err_free: + kfree(oa_bo); + return ERR_PTR(err); } -static void delay_after_mux(void) +static struct i915_vma * +get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) { + struct i915_oa_config_bo *oa_bo; + /* - * It apparently takes a fairly long time for a new MUX - * configuration to be be applied after these register writes. - * This delay duration was derived empirically based on the - * render_basic config but hopefully it covers the maximum - * configuration latency. - * - * As a fallback, the checks in _append_oa_reports() to skip - * invalid OA reports do also seem to work to discard reports - * generated before this config has completed - albeit not - * silently. - * - * Unfortunately this is essentially a magic number, since we - * don't currently know of a reliable mechanism for predicting - * how long the MUX config will take to apply and besides - * seeing invalid reports we don't know of a reliable way to - * explicitly check that the MUX config has landed. - * - * It's even possible we've miss characterized the underlying - * problem - it just seems like the simplest explanation why - * a delay at this location would mitigate any invalid reports. + * Look for the buffer in the already allocated BOs attached + * to the stream. */ - usleep_range(15000, 20000); + llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { + if (oa_bo->oa_config == oa_config && + memcmp(oa_bo->oa_config->uuid, + oa_config->uuid, + sizeof(oa_config->uuid)) == 0) + goto out; + } + + oa_bo = alloc_oa_config_buffer(stream, oa_config); + if (IS_ERR(oa_bo)) + return ERR_CAST(oa_bo); + +out: + return i915_vma_get(oa_bo->vma); +} + +static int emit_oa_config(struct i915_perf_stream *stream, + struct i915_oa_config *oa_config, + struct intel_context *ce) +{ + struct i915_request *rq; + struct i915_vma *vma; + int err; + + vma = get_oa_vma(stream, oa_config); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err_vma_put; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma_unpin; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, 0); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + if (err) + goto err_add_request; + + err = rq->engine->emit_bb_start(rq, + vma->node.start, 0, + I915_DISPATCH_SECURE); +err_add_request: + i915_request_add(rq); +err_vma_unpin: + i915_vma_unpin(vma); +err_vma_put: + i915_vma_put(vma); + return err; +} + +static struct intel_context *oa_context(struct i915_perf_stream *stream) +{ + return stream->pinned_ctx ?: stream->engine->kernel_context; } static int hsw_enable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - const struct i915_oa_config *oa_config = stream->oa_config; + struct intel_uncore *uncore = stream->uncore; /* * PRM: @@ -1616,31 +1923,24 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream) * count the events from non-render domain. Unit level clock * gating for RCS should also be disabled. */ - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN7_DOP_CLOCK_GATE_ENABLE)); - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | - GEN6_CSUNIT_CLOCK_GATE_DISABLE)); + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, + GEN7_DOP_CLOCK_GATE_ENABLE, 0); + intel_uncore_rmw(uncore, GEN6_UCGCTL1, + 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - delay_after_mux(); - - config_oa_regs(dev_priv, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); - - return 0; + return emit_oa_config(stream, stream->oa_config, oa_context(stream)); } static void hsw_disable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; - I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & - ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | - GEN7_DOP_CLOCK_GATE_ENABLE)); + intel_uncore_rmw(uncore, GEN6_UCGCTL1, + GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0); + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, + 0, GEN7_DOP_CLOCK_GATE_ENABLE); - I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & - ~GT_NOA_ENABLE)); + intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, @@ -1672,14 +1972,11 @@ static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, * in the case that the OA unit has been disabled. */ static void -gen8_update_reg_state_unlocked(struct i915_perf_stream *stream, - struct intel_context *ce, - u32 *reg_state, - const struct i915_oa_config *oa_config) -{ - struct drm_i915_private *i915 = ce->engine->i915; - u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset; - u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; +gen8_update_reg_state_unlocked(const struct intel_context *ce, + const struct i915_perf_stream *stream) +{ + u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset; + u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; /* The MMIO offsets for Flex EU registers aren't contiguous */ i915_reg_t flex_regs[] = { EU_PERF_CNTL0, @@ -1690,21 +1987,20 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream *stream, EU_PERF_CNTL5, EU_PERF_CNTL6, }; + u32 *reg_state = ce->lrc_reg_state; int i; - CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, + reg_state[ctx_oactxctrl + 1] = (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME); + GEN8_OA_COUNTER_RESUME; - for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { - CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i], - oa_config_flex_reg(oa_config, flex_regs[i])); - } + for (i = 0; i < ARRAY_SIZE(flex_regs); i++) + reg_state[ctx_flexeu0 + i * 2 + 1] = + oa_config_flex_reg(stream->oa_config, flex_regs[i]); - CTX_REG(reg_state, - CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - intel_sseu_make_rpcs(i915, &ce->sseu)); + reg_state[CTX_R_PWR_CLK_STATE] = + intel_sseu_make_rpcs(ce->engine->i915, &ce->sseu); } struct flex { @@ -1728,7 +2024,7 @@ gen8_store_flex(struct i915_request *rq, offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; do { *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = offset + (flex->offset + 1) * sizeof(u32); + *cs++ = offset + flex->offset * sizeof(u32); *cs++ = 0; *cs++ = flex->value; } while (flex++, --count); @@ -1859,10 +2155,10 @@ static int gen8_configure_context(struct i915_gem_context *ctx, static int gen8_configure_all_contexts(struct i915_perf_stream *stream, const struct i915_oa_config *oa_config) { - struct drm_i915_private *i915 = stream->dev_priv; + struct drm_i915_private *i915 = stream->perf->i915; /* The MMIO offsets for Flex EU registers aren't contiguous */ - const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; -#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N)) + const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) struct flex regs[] = { { GEN8_R_PWR_CLK_STATE, @@ -1870,7 +2166,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, }, { GEN8_OACTXCONTROL, - i915->perf.ctx_oactxctrl_offset, + stream->perf->ctx_oactxctrl_offset + 1, ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | GEN8_OA_COUNTER_RESUME) @@ -1885,13 +2181,13 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, }; #undef ctx_flexeuN struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - int i; + struct i915_gem_context *ctx, *cn; + int i, err; for (i = 2; i < ARRAY_SIZE(regs); i++) regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); - lockdep_assert_held(&i915->drm.struct_mutex); + lockdep_assert_held(&stream->perf->lock); /* * The OA register config is setup through the context image. This image @@ -1909,16 +2205,27 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, * context. Contexts idle at the time of reconfiguration are not * trapped behind the barrier. */ - list_for_each_entry(ctx, &i915->contexts.list, link) { - int err; - + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { if (ctx == i915->kernel_context) continue; + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + spin_unlock(&i915->gem.contexts.lock); + err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs)); - if (err) + if (err) { + i915_gem_context_put(ctx); return err; + } + + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); } + spin_unlock(&i915->gem.contexts.lock); /* * After updating all other contexts, we need to modify ourselves. @@ -1927,7 +2234,6 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, */ for_each_uabi_engine(engine, i915) { struct intel_context *ce = engine->kernel_context; - int err; if (engine->class != RENDER_CLASS) continue; @@ -1944,8 +2250,8 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, static int gen8_enable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - const struct i915_oa_config *oa_config = stream->oa_config; + struct intel_uncore *uncore = stream->uncore; + struct i915_oa_config *oa_config = stream->oa_config; int ret; /* @@ -1971,10 +2277,10 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) * be read back from automatically triggered reports, as part of the * RPT_ID field. */ - if (IS_GEN_RANGE(dev_priv, 9, 11)) { - I915_WRITE(GEN8_OA_DEBUG, - _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | - GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); + if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) { + intel_uncore_write(uncore, GEN8_OA_DEBUG, + _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | + GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); } /* @@ -1986,41 +2292,33 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) if (ret) return ret; - config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); - delay_after_mux(); - - config_oa_regs(dev_priv, oa_config->b_counter_regs, - oa_config->b_counter_regs_len); - - return 0; + return emit_oa_config(stream, oa_config, oa_context(stream)); } static void gen8_disable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ gen8_configure_all_contexts(stream, NULL); - I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & - ~GT_NOA_ENABLE)); + intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } static void gen10_disable_metric_set(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ gen8_configure_all_contexts(stream, NULL); /* Make sure we disable noa to save power. */ - I915_WRITE(RPM_CONFIG1, - I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); + intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); } static void gen7_oa_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; struct i915_gem_context *ctx = stream->ctx; u32 ctx_id = stream->specific_ctx_id; bool periodic = stream->periodic; @@ -2038,19 +2336,19 @@ static void gen7_oa_enable(struct i915_perf_stream *stream) */ gen7_init_oa_buffer(stream); - I915_WRITE(GEN7_OACONTROL, - (ctx_id & GEN7_OACONTROL_CTX_MASK) | - (period_exponent << - GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | - (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | - (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | - (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | - GEN7_OACONTROL_ENABLE); + intel_uncore_write(uncore, GEN7_OACONTROL, + (ctx_id & GEN7_OACONTROL_CTX_MASK) | + (period_exponent << + GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | + (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | + (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | + (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | + GEN7_OACONTROL_ENABLE); } static void gen8_oa_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_uncore *uncore = stream->uncore; u32 report_format = stream->oa_buffer.format; /* @@ -2069,9 +2367,9 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) * filtering and instead filter on the cpu based on the context-id * field of reports */ - I915_WRITE(GEN8_OACONTROL, (report_format << - GEN8_OA_REPORT_FORMAT_SHIFT) | - GEN8_OA_COUNTER_ENABLE); + intel_uncore_write(uncore, GEN8_OACONTROL, + (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) | + GEN8_OA_COUNTER_ENABLE); } /** @@ -2085,9 +2383,7 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) */ static void i915_oa_stream_enable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - dev_priv->perf.ops.oa_enable(stream); + stream->perf->ops.oa_enable(stream); if (stream->periodic) hrtimer_start(&stream->poll_check_timer, @@ -2097,7 +2393,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) static void gen7_oa_disable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = &stream->dev_priv->uncore; + struct intel_uncore *uncore = stream->uncore; intel_uncore_write(uncore, GEN7_OACONTROL, 0); if (intel_wait_for_register(uncore, @@ -2108,7 +2404,7 @@ static void gen7_oa_disable(struct i915_perf_stream *stream) static void gen8_oa_disable(struct i915_perf_stream *stream) { - struct intel_uncore *uncore = &stream->dev_priv->uncore; + struct intel_uncore *uncore = stream->uncore; intel_uncore_write(uncore, GEN8_OACONTROL, 0); if (intel_wait_for_register(uncore, @@ -2127,9 +2423,7 @@ static void gen8_oa_disable(struct i915_perf_stream *stream) */ static void i915_oa_stream_disable(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; - - dev_priv->perf.ops.oa_disable(stream); + stream->perf->ops.oa_disable(stream); if (stream->periodic) hrtimer_cancel(&stream->poll_check_timer); @@ -2166,15 +2460,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, struct drm_i915_perf_open_param *param, struct perf_open_properties *props) { - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; int format_size; int ret; - /* If the sysfs metrics/ directory wasn't registered for some + if (!props->engine) { + DRM_DEBUG("OA engine not specified\n"); + return -EINVAL; + } + + /* + * If the sysfs metrics/ directory wasn't registered for some * reason then don't let userspace try their luck with config * IDs */ - if (!dev_priv->perf.metrics_kobj) { + if (!perf->metrics_kobj) { DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); return -EINVAL; } @@ -2184,16 +2484,17 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } - if (!dev_priv->perf.ops.enable_metric_set) { + if (!perf->ops.enable_metric_set) { DRM_DEBUG("OA unit not supported\n"); return -ENODEV; } - /* To avoid the complexity of having to accurately filter + /* + * To avoid the complexity of having to accurately filter * counter reports and marshal to the appropriate client * we currently only allow exclusive access */ - if (dev_priv->perf.exclusive_stream) { + if (perf->exclusive_stream) { DRM_DEBUG("OA unit already in use\n"); return -EBUSY; } @@ -2203,9 +2504,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } + stream->engine = props->engine; + stream->uncore = stream->engine->gt->uncore; + stream->sample_size = sizeof(struct drm_i915_perf_record_header); - format_size = dev_priv->perf.oa_formats[props->oa_format].size; + format_size = perf->oa_formats[props->oa_format].size; stream->sample_flags |= SAMPLE_OA_REPORT; stream->sample_size += format_size; @@ -2214,8 +2518,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (WARN_ON(stream->oa_buffer.format_size == 0)) return -EINVAL; + stream->hold_preemption = props->hold_preemption; + stream->oa_buffer.format = - dev_priv->perf.oa_formats[props->oa_format].format; + perf->oa_formats[props->oa_format].format; stream->periodic = props->oa_periodic; if (stream->periodic) @@ -2229,9 +2535,16 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, } } - ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); + ret = alloc_noa_wait(stream); if (ret) { + DRM_DEBUG("Unable to allocate NOA wait batch buffer\n"); + goto err_noa_wait_alloc; + } + + stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set); + if (!stream->oa_config) { DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); + ret = -EINVAL; goto err_config; } @@ -2247,27 +2560,24 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * In our case we are expecting that taking pm + FORCEWAKE * references will effectively disable RC6. */ - stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); + intel_engine_pm_get(stream->engine); + intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL); ret = alloc_oa_buffer(stream); if (ret) goto err_oa_buf_alloc; - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - goto err_lock; - stream->ops = &i915_oa_stream_ops; - dev_priv->perf.exclusive_stream = stream; + perf->exclusive_stream = stream; - ret = dev_priv->perf.ops.enable_metric_set(stream); + ret = perf->ops.enable_metric_set(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; } - mutex_unlock(&dev_priv->drm.struct_mutex); + DRM_DEBUG("opening stream oa config uuid=%s\n", + stream->oa_config->uuid); hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -2278,38 +2588,41 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return 0; err_enable: - dev_priv->perf.exclusive_stream = NULL; - dev_priv->perf.ops.disable_metric_set(stream); - mutex_unlock(&dev_priv->drm.struct_mutex); + perf->exclusive_stream = NULL; + perf->ops.disable_metric_set(stream); -err_lock: free_oa_buffer(stream); err_oa_buf_alloc: - put_oa_config(dev_priv, stream->oa_config); + free_oa_configs(stream); - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); + intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); + intel_engine_pm_put(stream->engine); err_config: + free_noa_wait(stream); + +err_noa_wait_alloc: if (stream->ctx) oa_put_render_ctx_id(stream); return ret; } -void i915_oa_init_reg_state(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 *regs) +void i915_oa_init_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine) { struct i915_perf_stream *stream; + /* perf.exclusive_stream serialised by gen8_configure_all_contexts() */ + lockdep_assert_held(&ce->pin_mutex); + if (engine->class != RENDER_CLASS) return; stream = engine->i915->perf.exclusive_stream; if (stream) - gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config); + gen8_update_reg_state_unlocked(ce, stream); } /** @@ -2379,7 +2692,7 @@ static ssize_t i915_perf_read(struct file *file, loff_t *ppos) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; ssize_t ret; /* To ensure it's handled consistently we simply treat all reads of a @@ -2402,15 +2715,15 @@ static ssize_t i915_perf_read(struct file *file, if (ret) return ret; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_read_locked(stream, file, buf, count, ppos); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } while (ret == -EAGAIN); } else { - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_read_locked(stream, file, buf, count, ppos); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } /* We allow the poll checking to sometimes report false positive EPOLLIN @@ -2448,7 +2761,6 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) /** * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream - * @dev_priv: i915 device instance * @stream: An i915 perf stream * @file: An i915 perf stream file * @wait: poll() state table @@ -2457,15 +2769,14 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that * will be woken for new stream data. * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. * * Returns: any poll events that are ready without sleeping */ -static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, - struct i915_perf_stream *stream, - struct file *file, - poll_table *wait) +static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream, + struct file *file, + poll_table *wait) { __poll_t events = 0; @@ -2499,12 +2810,12 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, static __poll_t i915_perf_poll(struct file *file, poll_table *wait) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; __poll_t ret; - mutex_lock(&dev_priv->perf.lock); - ret = i915_perf_poll_locked(dev_priv, stream, file, wait); - mutex_unlock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); + ret = i915_perf_poll_locked(stream, file, wait); + mutex_unlock(&perf->lock); return ret; } @@ -2529,6 +2840,9 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream) if (stream->ops->enable) stream->ops->enable(stream); + + if (stream->hold_preemption) + i915_gem_context_set_nopreempt(stream->ctx); } /** @@ -2553,17 +2867,54 @@ static void i915_perf_disable_locked(struct i915_perf_stream *stream) /* Allow stream->ops->disable() to refer to this */ stream->enabled = false; + if (stream->hold_preemption) + i915_gem_context_clear_nopreempt(stream->ctx); + if (stream->ops->disable) stream->ops->disable(stream); } +static long i915_perf_config_locked(struct i915_perf_stream *stream, + unsigned long metrics_set) +{ + struct i915_oa_config *config; + long ret = stream->oa_config->id; + + config = i915_perf_get_oa_config(stream->perf, metrics_set); + if (!config) + return -EINVAL; + + if (config != stream->oa_config) { + int err; + + /* + * If OA is bound to a specific context, emit the + * reconfiguration inline from that context. The update + * will then be ordered with respect to submission on that + * context. + * + * When set globally, we use a low priority kernel context, + * so it will effectively take effect when idle. + */ + err = emit_oa_config(stream, config, oa_context(stream)); + if (err == 0) + config = xchg(&stream->oa_config, config); + else + ret = err; + } + + i915_oa_config_put(config); + + return ret; +} + /** * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs * @stream: An i915 perf stream * @cmd: the ioctl request * @arg: the ioctl data * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. * * Returns: zero on success or a negative error code. Returns -EINVAL for @@ -2580,6 +2931,8 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, case I915_PERF_IOCTL_DISABLE: i915_perf_disable_locked(stream); return 0; + case I915_PERF_IOCTL_CONFIG: + return i915_perf_config_locked(stream, arg); } return -EINVAL; @@ -2601,12 +2954,12 @@ static long i915_perf_ioctl(struct file *file, unsigned long arg) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; long ret; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); ret = i915_perf_ioctl_locked(stream, cmd, arg); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); return ret; } @@ -2618,7 +2971,7 @@ static long i915_perf_ioctl(struct file *file, * Frees all resources associated with the given i915 perf @stream, disabling * any associated data capture in the process. * - * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize + * Note: The &perf->lock mutex has been taken to serialize * with any non-file-operation driver hooks. */ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) @@ -2629,8 +2982,6 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) if (stream->ops->destroy) stream->ops->destroy(stream); - list_del(&stream->link); - if (stream->ctx) i915_gem_context_put(stream->ctx); @@ -2651,14 +3002,14 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) static int i915_perf_release(struct inode *inode, struct file *file) { struct i915_perf_stream *stream = file->private_data; - struct drm_i915_private *dev_priv = stream->dev_priv; + struct i915_perf *perf = stream->perf; - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); i915_perf_destroy_locked(stream); - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); /* Release the reference the perf stream kept on the driver. */ - drm_dev_put(&dev_priv->drm); + drm_dev_put(&perf->i915->drm); return 0; } @@ -2680,7 +3031,7 @@ static const struct file_operations fops = { /** * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD - * @dev_priv: i915 device instance + * @perf: i915 perf instance * @param: The open parameters passed to 'DRM_I915_PERF_OPEN` * @props: individually validated u64 property value pairs * @file: drm file @@ -2688,7 +3039,7 @@ static const struct file_operations fops = { * See i915_perf_ioctl_open() for interface details. * * Implements further stream config validation and stream initialization on - * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex + * behalf of i915_perf_open_ioctl() with the &perf->lock mutex * taken to serialize with any non-file-operation driver hooks. * * Note: at this point the @props have only been validated in isolation and @@ -2703,7 +3054,7 @@ static const struct file_operations fops = { * Returns: zero on success or a negative error code. */ static int -i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, +i915_perf_open_ioctl_locked(struct i915_perf *perf, struct drm_i915_perf_open_param *param, struct perf_open_properties *props, struct drm_file *file) @@ -2728,6 +3079,15 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, } } + if (props->hold_preemption) { + if (!props->single_context) { + DRM_DEBUG("preemption disable with no context\n"); + ret = -EINVAL; + goto err; + } + privileged_op = true; + } + /* * On Haswell the OA unit supports clock gating off for a specific * context and in this mode there's no visibility of metrics for the @@ -2742,7 +3102,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to * enable the OA unit by default. */ - if (IS_HASWELL(dev_priv) && specific_ctx) + if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption) privileged_op = false; /* Similar to perf's kernel.perf_paranoid_cpu sysctl option @@ -2752,7 +3112,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, */ if (privileged_op && i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { - DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); + DRM_DEBUG("Insufficient privileges to open i915 perf stream\n"); ret = -EACCES; goto err_ctx; } @@ -2763,7 +3123,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, goto err_ctx; } - stream->dev_priv = dev_priv; + stream->perf = perf; stream->ctx = specific_ctx; ret = i915_oa_stream_init(stream, param, props); @@ -2779,8 +3139,6 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, goto err_flags; } - list_add(&stream->link, &dev_priv->perf.streams); - if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) f_flags |= O_CLOEXEC; if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) @@ -2789,7 +3147,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); if (stream_fd < 0) { ret = stream_fd; - goto err_open; + goto err_flags; } if (!(param->flags & I915_PERF_FLAG_DISABLED)) @@ -2798,12 +3156,10 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, /* Take a reference on the driver that will be kept with stream_fd * until its release. */ - drm_dev_get(&dev_priv->drm); + drm_dev_get(&perf->i915->drm); return stream_fd; -err_open: - list_del(&stream->link); err_flags: if (stream->ops->destroy) stream->ops->destroy(stream); @@ -2816,15 +3172,15 @@ err: return ret; } -static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) +static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) { return div64_u64(1000000000ULL * (2ULL << exponent), - 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); + 1000ULL * RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz); } /** * read_properties_unlocked - validate + copy userspace stream open properties - * @dev_priv: i915 device instance + * @perf: i915 perf instance * @uprops: The array of u64 key value pairs given by userspace * @n_props: The number of key value pairs expected in @uprops * @props: The stream configuration built up while validating properties @@ -2837,7 +3193,7 @@ static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) * we shouldn't validate or assume anything about ordering here. This doesn't * rule out defining new properties with ordering requirements in the future. */ -static int read_properties_unlocked(struct drm_i915_private *dev_priv, +static int read_properties_unlocked(struct i915_perf *perf, u64 __user *uprops, u32 n_props, struct perf_open_properties *props) @@ -2852,6 +3208,15 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, return -EINVAL; } + /* At the moment we only support using i915-perf on the RCS. */ + props->engine = intel_engine_lookup_user(perf->i915, + I915_ENGINE_CLASS_RENDER, + 0); + if (!props->engine) { + DRM_DEBUG("No RENDER-capable engines\n"); + return -EINVAL; + } + /* Considering that ID = 0 is reserved and assuming that we don't * (currently) expect any configurations to ever specify duplicate * values for a particular property ID then the last _PROP_MAX value is @@ -2903,7 +3268,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, value); return -EINVAL; } - if (!dev_priv->perf.oa_formats[value].size) { + if (!perf->oa_formats[value].size) { DRM_DEBUG("Unsupported OA report format %llu\n", value); return -EINVAL; @@ -2924,7 +3289,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, */ BUILD_BUG_ON(sizeof(oa_period) != 8); - oa_period = oa_exponent_to_ns(dev_priv, value); + oa_period = oa_exponent_to_ns(perf, value); /* This check is primarily to ensure that oa_period <= * UINT32_MAX (before passing to do_div which only @@ -2949,6 +3314,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->oa_periodic = true; props->oa_period_exponent = value; break; + case DRM_I915_PERF_PROP_HOLD_PREEMPTION: + props->hold_preemption = !!value; + break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; @@ -2978,7 +3346,7 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, * mutex to avoid an awkward lockdep with mmap_sem. * * Most of the implementation details are handled by - * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock + * i915_perf_open_ioctl_locked() after taking the &perf->lock * mutex for serializing with any non-file-operation driver hooks. * * Return: A newly opened i915 Perf stream file descriptor or negative @@ -2987,13 +3355,13 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; struct drm_i915_perf_open_param *param = data; struct perf_open_properties props; u32 known_open_flags; int ret; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } @@ -3006,124 +3374,128 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = read_properties_unlocked(dev_priv, + ret = read_properties_unlocked(perf, u64_to_user_ptr(param->properties_ptr), param->num_properties, &props); if (ret) return ret; - mutex_lock(&dev_priv->perf.lock); - ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file); - mutex_unlock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); + ret = i915_perf_open_ioctl_locked(perf, param, &props, file); + mutex_unlock(&perf->lock); return ret; } /** * i915_perf_register - exposes i915-perf to userspace - * @dev_priv: i915 device instance + * @i915: i915 device instance * * In particular OA metric sets are advertised under a sysfs metrics/ * directory allowing userspace to enumerate valid IDs that can be * used to open an i915-perf stream. */ -void i915_perf_register(struct drm_i915_private *dev_priv) +void i915_perf_register(struct drm_i915_private *i915) { + struct i915_perf *perf = &i915->perf; int ret; - if (!dev_priv->perf.initialized) + if (!perf->i915) return; /* To be sure we're synchronized with an attempted * i915_perf_open_ioctl(); considering that we register after * being exposed to userspace. */ - mutex_lock(&dev_priv->perf.lock); + mutex_lock(&perf->lock); - dev_priv->perf.metrics_kobj = + perf->metrics_kobj = kobject_create_and_add("metrics", - &dev_priv->drm.primary->kdev->kobj); - if (!dev_priv->perf.metrics_kobj) + &i915->drm.primary->kdev->kobj); + if (!perf->metrics_kobj) goto exit; - sysfs_attr_init(&dev_priv->perf.test_config.sysfs_metric_id.attr); - - if (INTEL_GEN(dev_priv) >= 11) { - i915_perf_load_test_config_icl(dev_priv); - } else if (IS_CANNONLAKE(dev_priv)) { - i915_perf_load_test_config_cnl(dev_priv); - } else if (IS_COFFEELAKE(dev_priv)) { - if (IS_CFL_GT2(dev_priv)) - i915_perf_load_test_config_cflgt2(dev_priv); - if (IS_CFL_GT3(dev_priv)) - i915_perf_load_test_config_cflgt3(dev_priv); - } else if (IS_GEMINILAKE(dev_priv)) { - i915_perf_load_test_config_glk(dev_priv); - } else if (IS_KABYLAKE(dev_priv)) { - if (IS_KBL_GT2(dev_priv)) - i915_perf_load_test_config_kblgt2(dev_priv); - else if (IS_KBL_GT3(dev_priv)) - i915_perf_load_test_config_kblgt3(dev_priv); - } else if (IS_BROXTON(dev_priv)) { - i915_perf_load_test_config_bxt(dev_priv); - } else if (IS_SKYLAKE(dev_priv)) { - if (IS_SKL_GT2(dev_priv)) - i915_perf_load_test_config_sklgt2(dev_priv); - else if (IS_SKL_GT3(dev_priv)) - i915_perf_load_test_config_sklgt3(dev_priv); - else if (IS_SKL_GT4(dev_priv)) - i915_perf_load_test_config_sklgt4(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) { - i915_perf_load_test_config_chv(dev_priv); - } else if (IS_BROADWELL(dev_priv)) { - i915_perf_load_test_config_bdw(dev_priv); - } else if (IS_HASWELL(dev_priv)) { - i915_perf_load_test_config_hsw(dev_priv); -} - - if (dev_priv->perf.test_config.id == 0) + sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr); + + if (INTEL_GEN(i915) >= 11) { + i915_perf_load_test_config_icl(i915); + } else if (IS_CANNONLAKE(i915)) { + i915_perf_load_test_config_cnl(i915); + } else if (IS_COFFEELAKE(i915)) { + if (IS_CFL_GT2(i915)) + i915_perf_load_test_config_cflgt2(i915); + if (IS_CFL_GT3(i915)) + i915_perf_load_test_config_cflgt3(i915); + } else if (IS_GEMINILAKE(i915)) { + i915_perf_load_test_config_glk(i915); + } else if (IS_KABYLAKE(i915)) { + if (IS_KBL_GT2(i915)) + i915_perf_load_test_config_kblgt2(i915); + else if (IS_KBL_GT3(i915)) + i915_perf_load_test_config_kblgt3(i915); + } else if (IS_BROXTON(i915)) { + i915_perf_load_test_config_bxt(i915); + } else if (IS_SKYLAKE(i915)) { + if (IS_SKL_GT2(i915)) + i915_perf_load_test_config_sklgt2(i915); + else if (IS_SKL_GT3(i915)) + i915_perf_load_test_config_sklgt3(i915); + else if (IS_SKL_GT4(i915)) + i915_perf_load_test_config_sklgt4(i915); + } else if (IS_CHERRYVIEW(i915)) { + i915_perf_load_test_config_chv(i915); + } else if (IS_BROADWELL(i915)) { + i915_perf_load_test_config_bdw(i915); + } else if (IS_HASWELL(i915)) { + i915_perf_load_test_config_hsw(i915); + } + + if (perf->test_config.id == 0) goto sysfs_error; - ret = sysfs_create_group(dev_priv->perf.metrics_kobj, - &dev_priv->perf.test_config.sysfs_metric); + ret = sysfs_create_group(perf->metrics_kobj, + &perf->test_config.sysfs_metric); if (ret) goto sysfs_error; - atomic_set(&dev_priv->perf.test_config.ref_count, 1); + perf->test_config.perf = perf; + kref_init(&perf->test_config.ref); goto exit; sysfs_error: - kobject_put(dev_priv->perf.metrics_kobj); - dev_priv->perf.metrics_kobj = NULL; + kobject_put(perf->metrics_kobj); + perf->metrics_kobj = NULL; exit: - mutex_unlock(&dev_priv->perf.lock); + mutex_unlock(&perf->lock); } /** * i915_perf_unregister - hide i915-perf from userspace - * @dev_priv: i915 device instance + * @i915: i915 device instance * * i915-perf state cleanup is split up into an 'unregister' and * 'deinit' phase where the interface is first hidden from * userspace by i915_perf_unregister() before cleaning up * remaining state in i915_perf_fini(). */ -void i915_perf_unregister(struct drm_i915_private *dev_priv) +void i915_perf_unregister(struct drm_i915_private *i915) { - if (!dev_priv->perf.metrics_kobj) + struct i915_perf *perf = &i915->perf; + + if (!perf->metrics_kobj) return; - sysfs_remove_group(dev_priv->perf.metrics_kobj, - &dev_priv->perf.test_config.sysfs_metric); + sysfs_remove_group(perf->metrics_kobj, + &perf->test_config.sysfs_metric); - kobject_put(dev_priv->perf.metrics_kobj); - dev_priv->perf.metrics_kobj = NULL; + kobject_put(perf->metrics_kobj); + perf->metrics_kobj = NULL; } -static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) { static const i915_reg_t flex_eu_regs[] = { EU_PERF_CNTL0, @@ -3143,7 +3515,7 @@ static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) return false; } -static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || @@ -3153,7 +3525,7 @@ static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr <= i915_mmio_reg_offset(OACEC7_1)); } -static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && @@ -3164,34 +3536,34 @@ static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); } -static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || + return gen7_is_valid_mux_addr(perf, addr) || addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); } -static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen8_is_valid_mux_addr(dev_priv, addr) || + return gen8_is_valid_mux_addr(perf, addr) || addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); } -static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || + return gen7_is_valid_mux_addr(perf, addr) || (addr >= 0x25100 && addr <= 0x2FF90) || (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); } -static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) +static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return gen7_is_valid_mux_addr(dev_priv, addr) || + return gen7_is_valid_mux_addr(perf, addr) || (addr >= 0x182300 && addr <= 0x1823A4); } @@ -3214,8 +3586,8 @@ static u32 mask_reg_value(u32 reg, u32 val) return val; } -static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, - bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr), +static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf, + bool (*is_valid)(struct i915_perf *perf, u32 addr), u32 __user *regs, u32 n_regs) { @@ -3245,7 +3617,7 @@ static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, if (err) goto addr_err; - if (!is_valid(dev_priv, addr)) { + if (!is_valid(perf, addr)) { DRM_DEBUG("Invalid oa_reg address: %X\n", addr); err = -EINVAL; goto addr_err; @@ -3278,7 +3650,7 @@ static ssize_t show_dynamic_id(struct device *dev, return sprintf(buf, "%d\n", oa_config->id); } -static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, +static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf, struct i915_oa_config *oa_config) { sysfs_attr_init(&oa_config->sysfs_metric_id.attr); @@ -3293,7 +3665,7 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, oa_config->sysfs_metric.name = oa_config->uuid; oa_config->sysfs_metric.attrs = oa_config->attrs; - return sysfs_create_group(dev_priv->perf.metrics_kobj, + return sysfs_create_group(perf->metrics_kobj, &oa_config->sysfs_metric); } @@ -3313,17 +3685,18 @@ static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; struct drm_i915_perf_oa_config *args = data; struct i915_oa_config *oa_config, *tmp; + static struct i915_oa_reg *regs; int err, id; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } - if (!dev_priv->perf.metrics_kobj) { + if (!perf->metrics_kobj) { DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); return -EINVAL; } @@ -3346,7 +3719,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, return -ENOMEM; } - atomic_set(&oa_config->ref_count, 1); + oa_config->perf = perf; + kref_init(&oa_config->ref); if (!uuid_is_valid(args->uuid)) { DRM_DEBUG("Invalid uuid format for OA config\n"); @@ -3360,59 +3734,59 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); oa_config->mux_regs_len = args->n_mux_regs; - oa_config->mux_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.ops.is_valid_mux_reg, - u64_to_user_ptr(args->mux_regs_ptr), - args->n_mux_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_mux_reg, + u64_to_user_ptr(args->mux_regs_ptr), + args->n_mux_regs); - if (IS_ERR(oa_config->mux_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for mux_regs\n"); - err = PTR_ERR(oa_config->mux_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->mux_regs = regs; oa_config->b_counter_regs_len = args->n_boolean_regs; - oa_config->b_counter_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.ops.is_valid_b_counter_reg, - u64_to_user_ptr(args->boolean_regs_ptr), - args->n_boolean_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_b_counter_reg, + u64_to_user_ptr(args->boolean_regs_ptr), + args->n_boolean_regs); - if (IS_ERR(oa_config->b_counter_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); - err = PTR_ERR(oa_config->b_counter_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->b_counter_regs = regs; - if (INTEL_GEN(dev_priv) < 8) { + if (INTEL_GEN(perf->i915) < 8) { if (args->n_flex_regs != 0) { err = -EINVAL; goto reg_err; } } else { oa_config->flex_regs_len = args->n_flex_regs; - oa_config->flex_regs = - alloc_oa_regs(dev_priv, - dev_priv->perf.ops.is_valid_flex_reg, - u64_to_user_ptr(args->flex_regs_ptr), - args->n_flex_regs); + regs = alloc_oa_regs(perf, + perf->ops.is_valid_flex_reg, + u64_to_user_ptr(args->flex_regs_ptr), + args->n_flex_regs); - if (IS_ERR(oa_config->flex_regs)) { + if (IS_ERR(regs)) { DRM_DEBUG("Failed to create OA config for flex_regs\n"); - err = PTR_ERR(oa_config->flex_regs); + err = PTR_ERR(regs); goto reg_err; } + oa_config->flex_regs = regs; } - err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + err = mutex_lock_interruptible(&perf->metrics_lock); if (err) goto reg_err; /* We shouldn't have too many configs, so this iteration shouldn't be * too costly. */ - idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) { + idr_for_each_entry(&perf->metrics_idr, tmp, id) { if (!strcmp(tmp->uuid, oa_config->uuid)) { DRM_DEBUG("OA config already exists with this uuid\n"); err = -EADDRINUSE; @@ -3420,14 +3794,14 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, } } - err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config); + err = create_dynamic_oa_sysfs_entry(perf, oa_config); if (err) { DRM_DEBUG("Failed to create sysfs entry for OA config\n"); goto sysfs_err; } /* Config id 0 is invalid, id 1 for kernel stored test config. */ - oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr, + oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL); if (oa_config->id < 0) { @@ -3436,16 +3810,16 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, goto sysfs_err; } - mutex_unlock(&dev_priv->perf.metrics_lock); + mutex_unlock(&perf->metrics_lock); DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); return oa_config->id; sysfs_err: - mutex_unlock(&dev_priv->perf.metrics_lock); + mutex_unlock(&perf->metrics_lock); reg_err: - put_oa_config(dev_priv, oa_config); + i915_oa_config_put(oa_config); DRM_DEBUG("Failed to add new OA config\n"); return err; } @@ -3464,12 +3838,12 @@ reg_err: int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_perf *perf = &to_i915(dev)->perf; u64 *arg = data; struct i915_oa_config *oa_config; int ret; - if (!dev_priv->perf.initialized) { + if (!perf->i915) { DRM_DEBUG("i915 perf interface not available for this system\n"); return -ENOTSUPP; } @@ -3479,31 +3853,33 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, return -EACCES; } - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); + ret = mutex_lock_interruptible(&perf->metrics_lock); if (ret) - goto lock_err; + return ret; - oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg); + oa_config = idr_find(&perf->metrics_idr, *arg); if (!oa_config) { DRM_DEBUG("Failed to remove unknown OA config\n"); ret = -ENOENT; - goto config_err; + goto err_unlock; } GEM_BUG_ON(*arg != oa_config->id); - sysfs_remove_group(dev_priv->perf.metrics_kobj, - &oa_config->sysfs_metric); + sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric); + + idr_remove(&perf->metrics_idr, *arg); - idr_remove(&dev_priv->perf.metrics_idr, *arg); + mutex_unlock(&perf->metrics_lock); DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); - put_oa_config(dev_priv, oa_config); + i915_oa_config_put(oa_config); + + return 0; -config_err: - mutex_unlock(&dev_priv->perf.metrics_lock); -lock_err: +err_unlock: + mutex_unlock(&perf->metrics_lock); return ret; } @@ -3551,103 +3927,103 @@ static struct ctl_table dev_root[] = { /** * i915_perf_init - initialize i915-perf state on module load - * @dev_priv: i915 device instance + * @i915: i915 device instance * * Initializes i915-perf state without exposing anything to userspace. * * Note: i915-perf initialization is split into an 'init' and 'register' * phase with the i915_perf_register() exposing state to userspace. */ -void i915_perf_init(struct drm_i915_private *dev_priv) -{ - if (IS_HASWELL(dev_priv)) { - dev_priv->perf.ops.is_valid_b_counter_reg = - gen7_is_valid_b_counter_addr; - dev_priv->perf.ops.is_valid_mux_reg = - hsw_is_valid_mux_addr; - dev_priv->perf.ops.is_valid_flex_reg = NULL; - dev_priv->perf.ops.enable_metric_set = hsw_enable_metric_set; - dev_priv->perf.ops.disable_metric_set = hsw_disable_metric_set; - dev_priv->perf.ops.oa_enable = gen7_oa_enable; - dev_priv->perf.ops.oa_disable = gen7_oa_disable; - dev_priv->perf.ops.read = gen7_oa_read; - dev_priv->perf.ops.oa_hw_tail_read = - gen7_oa_hw_tail_read; - - dev_priv->perf.oa_formats = hsw_oa_formats; - } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { +void i915_perf_init(struct drm_i915_private *i915) +{ + struct i915_perf *perf = &i915->perf; + + /* XXX const struct i915_perf_ops! */ + + if (IS_HASWELL(i915)) { + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; + perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr; + perf->ops.is_valid_flex_reg = NULL; + perf->ops.enable_metric_set = hsw_enable_metric_set; + perf->ops.disable_metric_set = hsw_disable_metric_set; + perf->ops.oa_enable = gen7_oa_enable; + perf->ops.oa_disable = gen7_oa_disable; + perf->ops.read = gen7_oa_read; + perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read; + + perf->oa_formats = hsw_oa_formats; + } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) { /* Note: that although we could theoretically also support the * legacy ringbuffer mode on BDW (and earlier iterations of * this driver, before upstreaming did this) it didn't seem * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ - dev_priv->perf.oa_formats = gen8_plus_oa_formats; + perf->oa_formats = gen8_plus_oa_formats; - dev_priv->perf.ops.oa_enable = gen8_oa_enable; - dev_priv->perf.ops.oa_disable = gen8_oa_disable; - dev_priv->perf.ops.read = gen8_oa_read; - dev_priv->perf.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; + perf->ops.read = gen8_oa_read; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; - if (IS_GEN_RANGE(dev_priv, 8, 9)) { - dev_priv->perf.ops.is_valid_b_counter_reg = + if (IS_GEN_RANGE(i915, 8, 9)) { + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; - dev_priv->perf.ops.is_valid_mux_reg = + perf->ops.is_valid_mux_reg = gen8_is_valid_mux_addr; - dev_priv->perf.ops.is_valid_flex_reg = + perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; - if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->perf.ops.is_valid_mux_reg = + if (IS_CHERRYVIEW(i915)) { + perf->ops.is_valid_mux_reg = chv_is_valid_mux_addr; } - dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.ops.disable_metric_set = gen8_disable_metric_set; + perf->ops.enable_metric_set = gen8_enable_metric_set; + perf->ops.disable_metric_set = gen8_disable_metric_set; - if (IS_GEN(dev_priv, 8)) { - dev_priv->perf.ctx_oactxctrl_offset = 0x120; - dev_priv->perf.ctx_flexeu0_offset = 0x2ce; + if (IS_GEN(i915, 8)) { + perf->ctx_oactxctrl_offset = 0x120; + perf->ctx_flexeu0_offset = 0x2ce; - dev_priv->perf.gen8_valid_ctx_bit = BIT(25); + perf->gen8_valid_ctx_bit = BIT(25); } else { - dev_priv->perf.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.ctx_flexeu0_offset = 0x3de; + perf->ctx_oactxctrl_offset = 0x128; + perf->ctx_flexeu0_offset = 0x3de; - dev_priv->perf.gen8_valid_ctx_bit = BIT(16); + perf->gen8_valid_ctx_bit = BIT(16); } - } else if (IS_GEN_RANGE(dev_priv, 10, 11)) { - dev_priv->perf.ops.is_valid_b_counter_reg = + } else if (IS_GEN_RANGE(i915, 10, 11)) { + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; - dev_priv->perf.ops.is_valid_mux_reg = + perf->ops.is_valid_mux_reg = gen10_is_valid_mux_addr; - dev_priv->perf.ops.is_valid_flex_reg = + perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; - dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; - dev_priv->perf.ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.enable_metric_set = gen8_enable_metric_set; + perf->ops.disable_metric_set = gen10_disable_metric_set; - if (IS_GEN(dev_priv, 10)) { - dev_priv->perf.ctx_oactxctrl_offset = 0x128; - dev_priv->perf.ctx_flexeu0_offset = 0x3de; + if (IS_GEN(i915, 10)) { + perf->ctx_oactxctrl_offset = 0x128; + perf->ctx_flexeu0_offset = 0x3de; } else { - dev_priv->perf.ctx_oactxctrl_offset = 0x124; - dev_priv->perf.ctx_flexeu0_offset = 0x78e; + perf->ctx_oactxctrl_offset = 0x124; + perf->ctx_flexeu0_offset = 0x78e; } - dev_priv->perf.gen8_valid_ctx_bit = BIT(16); + perf->gen8_valid_ctx_bit = BIT(16); } } - if (dev_priv->perf.ops.enable_metric_set) { - INIT_LIST_HEAD(&dev_priv->perf.streams); - mutex_init(&dev_priv->perf.lock); + if (perf->ops.enable_metric_set) { + mutex_init(&perf->lock); oa_sample_rate_hard_limit = 1000 * - (RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); - dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); + (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); + perf->sysctl_header = register_sysctl_table(dev_root); - mutex_init(&dev_priv->perf.metrics_lock); - idr_init(&dev_priv->perf.metrics_idr); + mutex_init(&perf->metrics_lock); + idr_init(&perf->metrics_idr); /* We set up some ratelimit state to potentially throttle any * _NOTES about spurious, invalid OA reports which we don't @@ -3659,44 +4035,70 @@ void i915_perf_init(struct drm_i915_private *dev_priv) * * Using the same limiting factors as printk_ratelimit() */ - ratelimit_state_init(&dev_priv->perf.spurious_report_rs, - 5 * HZ, 10); + ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10); /* Since we use a DRM_NOTE for spurious reports it would be * inconsistent to let __ratelimit() automatically print a * warning for throttling. */ - ratelimit_set_flags(&dev_priv->perf.spurious_report_rs, + ratelimit_set_flags(&perf->spurious_report_rs, RATELIMIT_MSG_ON_RELEASE); - dev_priv->perf.initialized = true; + atomic64_set(&perf->noa_programming_delay, + 500 * 1000 /* 500us */); + + perf->i915 = i915; } } static int destroy_config(int id, void *p, void *data) { - struct drm_i915_private *dev_priv = data; - struct i915_oa_config *oa_config = p; - - put_oa_config(dev_priv, oa_config); - + i915_oa_config_put(p); return 0; } /** * i915_perf_fini - Counter part to i915_perf_init() - * @dev_priv: i915 device instance + * @i915: i915 device instance */ -void i915_perf_fini(struct drm_i915_private *dev_priv) +void i915_perf_fini(struct drm_i915_private *i915) { - if (!dev_priv->perf.initialized) + struct i915_perf *perf = &i915->perf; + + if (!perf->i915) return; - idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv); - idr_destroy(&dev_priv->perf.metrics_idr); + idr_for_each(&perf->metrics_idr, destroy_config, perf); + idr_destroy(&perf->metrics_idr); - unregister_sysctl_table(dev_priv->perf.sysctl_header); + unregister_sysctl_table(perf->sysctl_header); - memset(&dev_priv->perf.ops, 0, sizeof(dev_priv->perf.ops)); + memset(&perf->ops, 0, sizeof(perf->ops)); + perf->i915 = NULL; +} - dev_priv->perf.initialized = false; +/** + * i915_perf_ioctl_version - Version of the i915-perf subsystem + * + * This version number is used by userspace to detect available features. + */ +int i915_perf_ioctl_version(void) +{ + /* + * 1: Initial version + * I915_PERF_IOCTL_ENABLE + * I915_PERF_IOCTL_DISABLE + * + * 2: Added runtime modification of OA config. + * I915_PERF_IOCTL_CONFIG + * + * 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold + * preemption on a particular context so that performance data is + * accessible from a delta of MI_RPC reports without looking at the + * OA buffer. + */ + return 3; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_perf.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index a412b16d9ffc..4ceebce72060 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -6,11 +6,15 @@ #ifndef __I915_PERF_H__ #define __I915_PERF_H__ +#include <linux/kref.h> #include <linux/types.h> +#include "i915_perf_types.h" + struct drm_device; struct drm_file; struct drm_i915_private; +struct i915_oa_config; struct intel_context; struct intel_engine_cs; @@ -18,6 +22,7 @@ void i915_perf_init(struct drm_i915_private *i915); void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); +int i915_perf_ioctl_version(void); int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); @@ -25,8 +30,29 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -void i915_oa_init_reg_state(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 *reg_state); + +void i915_oa_init_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine); + +struct i915_oa_config * +i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set); + +static inline struct i915_oa_config * +i915_oa_config_get(struct i915_oa_config *oa_config) +{ + if (kref_get_unless_zero(&oa_config->ref)) + return oa_config; + else + return NULL; +} + +void i915_oa_config_release(struct kref *ref); +static inline void i915_oa_config_put(struct i915_oa_config *oa_config) +{ + if (!oa_config) + return; + + kref_put(&oa_config->ref, i915_oa_config_release); +} #endif /* __I915_PERF_H__ */ diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h new file mode 100644 index 000000000000..a1f733fc905a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -0,0 +1,406 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef _I915_PERF_TYPES_H_ +#define _I915_PERF_TYPES_H_ + +#include <linux/atomic.h> +#include <linux/device.h> +#include <linux/hrtimer.h> +#include <linux/llist.h> +#include <linux/poll.h> +#include <linux/sysfs.h> +#include <linux/types.h> +#include <linux/uuid.h> +#include <linux/wait.h> + +#include "i915_reg.h" +#include "intel_wakeref.h" + +struct drm_i915_private; +struct file; +struct i915_gem_context; +struct i915_perf; +struct i915_vma; +struct intel_context; +struct intel_engine_cs; + +struct i915_oa_format { + u32 format; + int size; +}; + +struct i915_oa_reg { + i915_reg_t addr; + u32 value; +}; + +struct i915_oa_config { + struct i915_perf *perf; + + char uuid[UUID_STRING_LEN + 1]; + int id; + + const struct i915_oa_reg *mux_regs; + u32 mux_regs_len; + const struct i915_oa_reg *b_counter_regs; + u32 b_counter_regs_len; + const struct i915_oa_reg *flex_regs; + u32 flex_regs_len; + + struct attribute_group sysfs_metric; + struct attribute *attrs[2]; + struct device_attribute sysfs_metric_id; + + struct kref ref; + struct rcu_head rcu; +}; + +struct i915_perf_stream; + +/** + * struct i915_perf_stream_ops - the OPs to support a specific stream type + */ +struct i915_perf_stream_ops { + /** + * @enable: Enables the collection of HW samples, either in response to + * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened + * without `I915_PERF_FLAG_DISABLED`. + */ + void (*enable)(struct i915_perf_stream *stream); + + /** + * @disable: Disables the collection of HW samples, either in response + * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying + * the stream. + */ + void (*disable)(struct i915_perf_stream *stream); + + /** + * @poll_wait: Call poll_wait, passing a wait queue that will be woken + * once there is something ready to read() for the stream + */ + void (*poll_wait)(struct i915_perf_stream *stream, + struct file *file, + poll_table *wait); + + /** + * @wait_unlocked: For handling a blocking read, wait until there is + * something to ready to read() for the stream. E.g. wait on the same + * wait queue that would be passed to poll_wait(). + */ + int (*wait_unlocked)(struct i915_perf_stream *stream); + + /** + * @read: Copy buffered metrics as records to userspace + * **buf**: the userspace, destination buffer + * **count**: the number of bytes to copy, requested by userspace + * **offset**: zero at the start of the read, updated as the read + * proceeds, it represents how many bytes have been copied so far and + * the buffer offset for copying the next record. + * + * Copy as many buffered i915 perf samples and records for this stream + * to userspace as will fit in the given buffer. + * + * Only write complete records; returning -%ENOSPC if there isn't room + * for a complete record. + * + * Return any error condition that results in a short read such as + * -%ENOSPC or -%EFAULT, even though these may be squashed before + * returning to userspace. + */ + int (*read)(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset); + + /** + * @destroy: Cleanup any stream specific resources. + * + * The stream will always be disabled before this is called. + */ + void (*destroy)(struct i915_perf_stream *stream); +}; + +/** + * struct i915_perf_stream - state for a single open stream FD + */ +struct i915_perf_stream { + /** + * @perf: i915_perf backpointer + */ + struct i915_perf *perf; + + /** + * @uncore: mmio access path + */ + struct intel_uncore *uncore; + + /** + * @engine: Engine associated with this performance stream. + */ + struct intel_engine_cs *engine; + + /** + * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` + * properties given when opening a stream, representing the contents + * of a single sample as read() by userspace. + */ + u32 sample_flags; + + /** + * @sample_size: Considering the configured contents of a sample + * combined with the required header size, this is the total size + * of a single sample record. + */ + int sample_size; + + /** + * @ctx: %NULL if measuring system-wide across all contexts or a + * specific context that is being monitored. + */ + struct i915_gem_context *ctx; + + /** + * @enabled: Whether the stream is currently enabled, considering + * whether the stream was opened in a disabled state and based + * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. + */ + bool enabled; + + /** + * @hold_preemption: Whether preemption is put on hold for command + * submissions done on the @ctx. This is useful for some drivers that + * cannot easily post process the OA buffer context to subtract delta + * of performance counters not associated with @ctx. + */ + bool hold_preemption; + + /** + * @ops: The callbacks providing the implementation of this specific + * type of configured stream. + */ + const struct i915_perf_stream_ops *ops; + + /** + * @oa_config: The OA configuration used by the stream. + */ + struct i915_oa_config *oa_config; + + /** + * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily + * each time @oa_config changes. + */ + struct llist_head oa_config_bos; + + /** + * @pinned_ctx: The OA context specific information. + */ + struct intel_context *pinned_ctx; + u32 specific_ctx_id; + u32 specific_ctx_id_mask; + + struct hrtimer poll_check_timer; + wait_queue_head_t poll_wq; + bool pollin; + + bool periodic; + int period_exponent; + + /** + * @oa_buffer: State of the OA buffer. + */ + struct { + struct i915_vma *vma; + u8 *vaddr; + u32 last_ctx_id; + int format; + int format_size; + int size_exponent; + + /** + * @ptr_lock: Locks reads and writes to all head/tail state + * + * Consider: the head and tail pointer state needs to be read + * consistently from a hrtimer callback (atomic context) and + * read() fop (user context) with tail pointer updates happening + * in atomic context and head updates in user context and the + * (unlikely) possibility of read() errors needing to reset all + * head/tail state. + * + * Note: Contention/performance aren't currently a significant + * concern here considering the relatively low frequency of + * hrtimer callbacks (5ms period) and that reads typically only + * happen in response to a hrtimer event and likely complete + * before the next callback. + * + * Note: This lock is not held *while* reading and copying data + * to userspace so the value of head observed in htrimer + * callbacks won't represent any partial consumption of data. + */ + spinlock_t ptr_lock; + + /** + * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to + * used for reading. + * + * Initial values of 0xffffffff are invalid and imply that an + * update is required (and should be ignored by an attempted + * read) + */ + struct { + u32 offset; + } tails[2]; + + /** + * @aged_tail_idx: Index for the aged tail ready to read() data up to. + */ + unsigned int aged_tail_idx; + + /** + * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer + * was read; used to determine when it is old enough to trust. + */ + u64 aging_timestamp; + + /** + * @head: Although we can always read back the head pointer register, + * we prefer to avoid trusting the HW state, just to avoid any + * risk that some hardware condition could * somehow bump the + * head pointer unpredictably and cause us to forward the wrong + * OA buffer data to userspace. + */ + u32 head; + } oa_buffer; + + /** + * A batch buffer doing a wait on the GPU for the NOA logic to be + * reprogrammed. + */ + struct i915_vma *noa_wait; +}; + +/** + * struct i915_oa_ops - Gen specific implementation of an OA unit stream + */ +struct i915_oa_ops { + /** + * @is_valid_b_counter_reg: Validates register's address for + * programming boolean counters for a particular platform. + */ + bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); + + /** + * @is_valid_mux_reg: Validates register's address for programming mux + * for a particular platform. + */ + bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); + + /** + * @is_valid_flex_reg: Validates register's address for programming + * flex EU filtering for a particular platform. + */ + bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); + + /** + * @enable_metric_set: Selects and applies any MUX configuration to set + * up the Boolean and Custom (B/C) counters that are part of the + * counter reports being sampled. May apply system constraints such as + * disabling EU clock gating as required. + */ + int (*enable_metric_set)(struct i915_perf_stream *stream); + + /** + * @disable_metric_set: Remove system constraints associated with using + * the OA unit. + */ + void (*disable_metric_set)(struct i915_perf_stream *stream); + + /** + * @oa_enable: Enable periodic sampling + */ + void (*oa_enable)(struct i915_perf_stream *stream); + + /** + * @oa_disable: Disable periodic sampling + */ + void (*oa_disable)(struct i915_perf_stream *stream); + + /** + * @read: Copy data from the circular OA buffer into a given userspace + * buffer. + */ + int (*read)(struct i915_perf_stream *stream, + char __user *buf, + size_t count, + size_t *offset); + + /** + * @oa_hw_tail_read: read the OA tail pointer register + * + * In particular this enables us to share all the fiddly code for + * handling the OA unit tail pointer race that affects multiple + * generations. + */ + u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); +}; + +struct i915_perf { + struct drm_i915_private *i915; + + struct kobject *metrics_kobj; + struct ctl_table_header *sysctl_header; + + /* + * Lock associated with adding/modifying/removing OA configs + * in perf->metrics_idr. + */ + struct mutex metrics_lock; + + /* + * List of dynamic configurations (struct i915_oa_config), you + * need to hold perf->metrics_lock to access it. + */ + struct idr metrics_idr; + + /* + * Lock associated with anything below within this structure + * except exclusive_stream. + */ + struct mutex lock; + + /* + * The stream currently using the OA unit. If accessed + * outside a syscall associated to its file + * descriptor. + */ + struct i915_perf_stream *exclusive_stream; + + /** + * For rate limiting any notifications of spurious + * invalid OA reports + */ + struct ratelimit_state spurious_report_rs; + + struct i915_oa_config test_config; + + u32 gen7_latched_oastatus1; + u32 ctx_oactxctrl_offset; + u32 ctx_flexeu0_offset; + + /** + * The RPT_ID/reason field for Gen8+ includes a bit + * to determine if the CTX ID in the report is valid + * but the specific bit differs between Gen 8 and 9 + */ + u32 gen8_valid_ctx_bit; + + struct i915_oa_ops ops; + const struct i915_oa_format *oa_formats; + + atomic64_t noa_programming_delay; +}; + +#endif /* _I915_PERF_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 8e251e719390..85912917c062 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -11,6 +11,7 @@ #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_rc6.h" #include "i915_drv.h" #include "i915_pmu.h" @@ -116,22 +117,124 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) return enable; } -void i915_pmu_gt_parked(struct drm_i915_private *i915) +static u64 __get_rc6(struct intel_gt *gt) { - struct i915_pmu *pmu = &i915->pmu; + struct drm_i915_private *i915 = gt->i915; + u64 val; - if (!pmu->base.event_init) - return; + val = intel_rc6_residency_ns(>->rc6, + IS_VALLEYVIEW(i915) ? + VLV_GT_RENDER_RC6 : + GEN6_GT_GFX_RC6); + + if (HAS_RC6p(i915)) + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6p); + + if (HAS_RC6pp(i915)) + val += intel_rc6_residency_ns(>->rc6, GEN6_GT_GFX_RC6pp); + + return val; +} + +#if IS_ENABLED(CONFIG_PM) + +static inline s64 ktime_since(const ktime_t kt) +{ + return ktime_to_ns(ktime_sub(ktime_get(), kt)); +} + +static u64 __pmu_estimate_rc6(struct i915_pmu *pmu) +{ + u64 val; - spin_lock_irq(&pmu->lock); /* - * Signal sampling timer to stop if only engine events are enabled and - * GPU went idle. + * We think we are runtime suspended. + * + * Report the delta from when the device was suspended to now, + * on top of the last known real value, as the approximated RC6 + * counter value. */ - pmu->timer_enabled = pmu_needs_timer(pmu, false); - spin_unlock_irq(&pmu->lock); + val = ktime_since(pmu->sleep_last); + val += pmu->sample[__I915_SAMPLE_RC6].cur; + + pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; + + return val; +} + +static u64 __pmu_update_rc6(struct i915_pmu *pmu, u64 val) +{ + /* + * If we are coming back from being runtime suspended we must + * be careful not to report a larger value than returned + * previously. + */ + if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { + pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; + pmu->sample[__I915_SAMPLE_RC6].cur = val; + } else { + val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; + } + + return val; } +static u64 get_rc6(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct i915_pmu *pmu = &i915->pmu; + unsigned long flags; + u64 val; + + val = 0; + if (intel_gt_pm_get_if_awake(gt)) { + val = __get_rc6(gt); + intel_gt_pm_put(gt); + } + + spin_lock_irqsave(&pmu->lock, flags); + + if (val) + val = __pmu_update_rc6(pmu, val); + else + val = __pmu_estimate_rc6(pmu); + + spin_unlock_irqrestore(&pmu->lock, flags); + + return val; +} + +static void park_rc6(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) + __pmu_update_rc6(pmu, __get_rc6(&i915->gt)); + + pmu->sleep_last = ktime_get(); +} + +static void unpark_rc6(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + /* Estimate how long we slept and accumulate that into rc6 counters */ + if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) + __pmu_estimate_rc6(pmu); +} + +#else + +static u64 get_rc6(struct intel_gt *gt) +{ + return __get_rc6(gt); +} + +static void park_rc6(struct drm_i915_private *i915) {} +static void unpark_rc6(struct drm_i915_private *i915) {} + +#endif + static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) { if (!pmu->timer_enabled && pmu_needs_timer(pmu, true)) { @@ -143,6 +246,26 @@ static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) } } +void i915_pmu_gt_parked(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + if (!pmu->base.event_init) + return; + + spin_lock_irq(&pmu->lock); + + park_rc6(i915); + + /* + * Signal sampling timer to stop if only engine events are enabled and + * GPU went idle. + */ + pmu->timer_enabled = pmu_needs_timer(pmu, false); + + spin_unlock_irq(&pmu->lock); +} + void i915_pmu_gt_unparked(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; @@ -151,10 +274,14 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915) return; spin_lock_irq(&pmu->lock); + /* * Re-enable sampling timer when GPU goes active. */ __i915_pmu_maybe_start_timer(pmu); + + unpark_rc6(i915); + spin_unlock_irq(&pmu->lock); } @@ -174,7 +301,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) return; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt, id) { struct intel_engine_pmu *pmu = &engine->pmu; unsigned long flags; bool busy; @@ -194,6 +321,10 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) if (val & RING_WAIT_SEMAPHORE) add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); + /* No need to sample when busy stats are supported. */ + if (intel_engine_supports_stats(engine)) + goto skip; + /* * While waiting on a semaphore or event, MI_MODE reports the * ring as idle. However, previously using the seqno, and with @@ -426,104 +557,6 @@ static int i915_pmu_event_init(struct perf_event *event) return 0; } -static u64 __get_rc6(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - u64 val; - - val = intel_rc6_residency_ns(i915, - IS_VALLEYVIEW(i915) ? - VLV_GT_RENDER_RC6 : - GEN6_GT_GFX_RC6); - - if (HAS_RC6p(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); - - if (HAS_RC6pp(i915)) - val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); - - return val; -} - -static u64 get_rc6(struct intel_gt *gt) -{ -#if IS_ENABLED(CONFIG_PM) - struct drm_i915_private *i915 = gt->i915; - struct intel_runtime_pm *rpm = &i915->runtime_pm; - struct i915_pmu *pmu = &i915->pmu; - intel_wakeref_t wakeref; - unsigned long flags; - u64 val; - - wakeref = intel_runtime_pm_get_if_in_use(rpm); - if (wakeref) { - val = __get_rc6(gt); - intel_runtime_pm_put(rpm, wakeref); - - /* - * If we are coming back from being runtime suspended we must - * be careful not to report a larger value than returned - * previously. - */ - - spin_lock_irqsave(&pmu->lock, flags); - - if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { - pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; - pmu->sample[__I915_SAMPLE_RC6].cur = val; - } else { - val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; - } - - spin_unlock_irqrestore(&pmu->lock, flags); - } else { - struct device *kdev = rpm->kdev; - - /* - * We are runtime suspended. - * - * Report the delta from when the device was suspended to now, - * on top of the last known real value, as the approximated RC6 - * counter value. - */ - spin_lock_irqsave(&pmu->lock, flags); - - /* - * After the above branch intel_runtime_pm_get_if_in_use failed - * to get the runtime PM reference we cannot assume we are in - * runtime suspend since we can either: a) race with coming out - * of it before we took the power.lock, or b) there are other - * states than suspended which can bring us here. - * - * We need to double-check that we are indeed currently runtime - * suspended and if not we cannot do better than report the last - * known RC6 value. - */ - if (pm_runtime_status_suspended(kdev)) { - val = pm_runtime_suspended_time(kdev); - - if (!pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) - pmu->suspended_time_last = val; - - val -= pmu->suspended_time_last; - val += pmu->sample[__I915_SAMPLE_RC6].cur; - - pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; - } else if (pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { - val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; - } else { - val = pmu->sample[__I915_SAMPLE_RC6].cur; - } - - spin_unlock_irqrestore(&pmu->lock, flags); - } - - return val; -#else - return __get_rc6(gt); -#endif -} - static u64 __i915_pmu_event_read(struct perf_event *event) { struct drm_i915_private *i915 = @@ -1047,10 +1080,21 @@ static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) cpuhp_remove_multi_state(cpuhp_slot); } +static bool is_igp(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + /* IGP is 0000:00:02.0 */ + return pci_domain_nr(pdev->bus) == 0 && + pdev->bus->number == 0 && + PCI_SLOT(pdev->devfn) == 2 && + PCI_FUNC(pdev->devfn) == 0; +} + void i915_pmu_register(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; - int ret; + int ret = -ENOMEM; if (INTEL_GEN(i915) <= 2) { dev_info(i915->drm.dev, "PMU not supported for this GPU."); @@ -1058,10 +1102,8 @@ void i915_pmu_register(struct drm_i915_private *i915) } i915_pmu_events_attr_group.attrs = create_event_attributes(pmu); - if (!i915_pmu_events_attr_group.attrs) { - ret = -ENOMEM; + if (!i915_pmu_events_attr_group.attrs) goto err; - } pmu->base.attr_groups = i915_pmu_attr_groups; pmu->base.task_ctx_nr = perf_invalid_context; @@ -1077,10 +1119,19 @@ void i915_pmu_register(struct drm_i915_private *i915) hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; - ret = perf_pmu_register(&pmu->base, "i915", -1); - if (ret) + if (!is_igp(i915)) + pmu->name = kasprintf(GFP_KERNEL, + "i915-%s", + dev_name(i915->drm.dev)); + else + pmu->name = "i915"; + if (!pmu->name) goto err; + ret = perf_pmu_register(&pmu->base, pmu->name, -1); + if (ret) + goto err_name; + ret = i915_pmu_register_cpuhp_state(pmu); if (ret) goto err_unreg; @@ -1089,6 +1140,9 @@ void i915_pmu_register(struct drm_i915_private *i915) err_unreg: perf_pmu_unregister(&pmu->base); +err_name: + if (!is_igp(i915)) + kfree(pmu->name); err: pmu->base.event_init = NULL; free_event_attributes(pmu); @@ -1110,5 +1164,7 @@ void i915_pmu_unregister(struct drm_i915_private *i915) perf_pmu_unregister(&pmu->base); pmu->base.event_init = NULL; + if (!is_igp(i915)) + kfree(pmu->name); free_event_attributes(pmu); } diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 4fc4f2478301..bf52e3983631 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -47,6 +47,10 @@ struct i915_pmu { */ struct pmu base; /** + * @name: Name as registered with perf core. + */ + const char *name; + /** * @lock: Lock protecting enable mask and ref count handling. */ spinlock_t lock; @@ -97,9 +101,9 @@ struct i915_pmu { */ struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; /** - * @suspended_time_last: Cached suspend time from PM core. + * @sleep_last: Last time GT parked for RC6 estimation. */ - u64 suspended_time_last; + ktime_t sleep_last; /** * @i915_attr: Memory block holding device attributes. */ diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index ad9240a0817a..c27cfef9281c 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -7,6 +7,7 @@ #include <linux/nospec.h> #include "i915_drv.h" +#include "i915_perf.h" #include "i915_query.h" #include <uapi/drm/i915_drm.h> @@ -37,8 +38,6 @@ static int query_topology_info(struct drm_i915_private *dev_priv, const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; struct drm_i915_query_topology_info topo; u32 slice_length, subslice_length, eu_length, total_length; - u8 subslice_stride = GEN_SSEU_STRIDE(sseu->max_subslices); - u8 eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); int ret; if (query_item->flags != 0) @@ -50,8 +49,8 @@ static int query_topology_info(struct drm_i915_private *dev_priv, BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask)); slice_length = sizeof(sseu->slice_mask); - subslice_length = sseu->max_slices * subslice_stride; - eu_length = sseu->max_slices * sseu->max_subslices * eu_stride; + subslice_length = sseu->max_slices * sseu->ss_stride; + eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride; total_length = sizeof(topo) + slice_length + subslice_length + eu_length; @@ -69,9 +68,9 @@ static int query_topology_info(struct drm_i915_private *dev_priv, topo.max_eus_per_subslice = sseu->max_eus_per_subslice; topo.subslice_offset = slice_length; - topo.subslice_stride = subslice_stride; + topo.subslice_stride = sseu->ss_stride; topo.eu_offset = slice_length + subslice_length; - topo.eu_stride = eu_stride; + topo.eu_stride = sseu->eu_stride; if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr), &topo, sizeof(topo))) @@ -142,10 +141,305 @@ query_engine_info(struct drm_i915_private *i915, return len; } +static int can_copy_perf_config_registers_or_number(u32 user_n_regs, + u64 user_regs_ptr, + u32 kernel_n_regs) +{ + /* + * We'll just put the number of registers, and won't copy the + * register. + */ + if (user_n_regs == 0) + return 0; + + if (user_n_regs < kernel_n_regs) + return -EINVAL; + + if (!access_ok(u64_to_user_ptr(user_regs_ptr), + 2 * sizeof(u32) * kernel_n_regs)) + return -EFAULT; + + return 0; +} + +static int copy_perf_config_registers_or_number(const struct i915_oa_reg *kernel_regs, + u32 kernel_n_regs, + u64 user_regs_ptr, + u32 *user_n_regs) +{ + u32 r; + + if (*user_n_regs == 0) { + *user_n_regs = kernel_n_regs; + return 0; + } + + *user_n_regs = kernel_n_regs; + + for (r = 0; r < kernel_n_regs; r++) { + u32 __user *user_reg_ptr = + u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2); + u32 __user *user_val_ptr = + u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2 + + sizeof(u32)); + int ret; + + ret = __put_user(i915_mmio_reg_offset(kernel_regs[r].addr), + user_reg_ptr); + if (ret) + return -EFAULT; + + ret = __put_user(kernel_regs[r].value, user_val_ptr); + if (ret) + return -EFAULT; + } + + return 0; +} + +static int query_perf_config_data(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item, + bool use_uuid) +{ + struct drm_i915_query_perf_config __user *user_query_config_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct drm_i915_perf_oa_config __user *user_config_ptr = + u64_to_user_ptr(query_item->data_ptr + + sizeof(struct drm_i915_query_perf_config)); + struct drm_i915_perf_oa_config user_config; + struct i915_perf *perf = &i915->perf; + struct i915_oa_config *oa_config; + char uuid[UUID_STRING_LEN + 1]; + u64 config_id; + u32 flags, total_size; + int ret; + + if (!perf->i915) + return -ENODEV; + + total_size = + sizeof(struct drm_i915_query_perf_config) + + sizeof(struct drm_i915_perf_oa_config); + + if (query_item->length == 0) + return total_size; + + if (query_item->length < total_size) { + DRM_DEBUG("Invalid query config data item size=%u expected=%u\n", + query_item->length, total_size); + return -EINVAL; + } + + if (!access_ok(user_query_config_ptr, total_size)) + return -EFAULT; + + if (__get_user(flags, &user_query_config_ptr->flags)) + return -EFAULT; + + if (flags != 0) + return -EINVAL; + + if (use_uuid) { + struct i915_oa_config *tmp; + int id; + + BUILD_BUG_ON(sizeof(user_query_config_ptr->uuid) >= sizeof(uuid)); + + memset(&uuid, 0, sizeof(uuid)); + if (__copy_from_user(uuid, user_query_config_ptr->uuid, + sizeof(user_query_config_ptr->uuid))) + return -EFAULT; + + oa_config = NULL; + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, uuid)) { + oa_config = i915_oa_config_get(tmp); + break; + } + } + rcu_read_unlock(); + } else { + if (__get_user(config_id, &user_query_config_ptr->config)) + return -EFAULT; + + oa_config = i915_perf_get_oa_config(perf, config_id); + } + if (!oa_config) + return -ENOENT; + + if (__copy_from_user(&user_config, user_config_ptr, + sizeof(user_config))) { + ret = -EFAULT; + goto out; + } + + ret = can_copy_perf_config_registers_or_number(user_config.n_boolean_regs, + user_config.boolean_regs_ptr, + oa_config->b_counter_regs_len); + if (ret) + goto out; + + ret = can_copy_perf_config_registers_or_number(user_config.n_flex_regs, + user_config.flex_regs_ptr, + oa_config->flex_regs_len); + if (ret) + goto out; + + ret = can_copy_perf_config_registers_or_number(user_config.n_mux_regs, + user_config.mux_regs_ptr, + oa_config->mux_regs_len); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->b_counter_regs, + oa_config->b_counter_regs_len, + user_config.boolean_regs_ptr, + &user_config.n_boolean_regs); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->flex_regs, + oa_config->flex_regs_len, + user_config.flex_regs_ptr, + &user_config.n_flex_regs); + if (ret) + goto out; + + ret = copy_perf_config_registers_or_number(oa_config->mux_regs, + oa_config->mux_regs_len, + user_config.mux_regs_ptr, + &user_config.n_mux_regs); + if (ret) + goto out; + + memcpy(user_config.uuid, oa_config->uuid, sizeof(user_config.uuid)); + + if (__copy_to_user(user_config_ptr, &user_config, + sizeof(user_config))) { + ret = -EFAULT; + goto out; + } + + ret = total_size; + +out: + i915_oa_config_put(oa_config); + return ret; +} + +static size_t sizeof_perf_config_list(size_t count) +{ + return sizeof(struct drm_i915_query_perf_config) + sizeof(u64) * count; +} + +static size_t sizeof_perf_metrics(struct i915_perf *perf) +{ + struct i915_oa_config *tmp; + size_t i; + int id; + + i = 1; + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) + i++; + rcu_read_unlock(); + + return sizeof_perf_config_list(i); +} + +static int query_perf_config_list(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + struct drm_i915_query_perf_config __user *user_query_config_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct i915_perf *perf = &i915->perf; + u64 *oa_config_ids = NULL; + int alloc, n_configs; + u32 flags; + int ret; + + if (!perf->i915) + return -ENODEV; + + if (query_item->length == 0) + return sizeof_perf_metrics(perf); + + if (get_user(flags, &user_query_config_ptr->flags)) + return -EFAULT; + + if (flags != 0) + return -EINVAL; + + n_configs = 1; + do { + struct i915_oa_config *tmp; + u64 *ids; + int id; + + ids = krealloc(oa_config_ids, + n_configs * sizeof(*oa_config_ids), + GFP_KERNEL); + if (!ids) + return -ENOMEM; + + alloc = fetch_and_zero(&n_configs); + + ids[n_configs++] = 1ull; /* reserved for test_config */ + rcu_read_lock(); + idr_for_each_entry(&perf->metrics_idr, tmp, id) { + if (n_configs < alloc) + ids[n_configs] = id; + n_configs++; + } + rcu_read_unlock(); + + oa_config_ids = ids; + } while (n_configs > alloc); + + if (query_item->length < sizeof_perf_config_list(n_configs)) { + DRM_DEBUG("Invalid query config list item size=%u expected=%zu\n", + query_item->length, + sizeof_perf_config_list(n_configs)); + kfree(oa_config_ids); + return -EINVAL; + } + + if (put_user(n_configs, &user_query_config_ptr->config)) { + kfree(oa_config_ids); + return -EFAULT; + } + + ret = copy_to_user(user_query_config_ptr + 1, + oa_config_ids, + n_configs * sizeof(*oa_config_ids)); + kfree(oa_config_ids); + if (ret) + return -EFAULT; + + return sizeof_perf_config_list(n_configs); +} + +static int query_perf_config(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + switch (query_item->flags) { + case DRM_I915_QUERY_PERF_CONFIG_LIST: + return query_perf_config_list(i915, query_item); + case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID: + return query_perf_config_data(i915, query_item, true); + case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID: + return query_perf_config_data(i915, query_item, false); + default: + return -EINVAL; + } +} + static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) = { query_topology_info, query_engine_info, + query_perf_config, }; int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2abd199093c5..855db888516c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -545,7 +545,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MI_PREDICATE_SRC0_UDW _MMIO(0x2400 + 4) #define MI_PREDICATE_SRC1 _MMIO(0x2408) #define MI_PREDICATE_SRC1_UDW _MMIO(0x2408 + 4) - +#define MI_PREDICATE_DATA _MMIO(0x2410) +#define MI_PREDICATE_RESULT _MMIO(0x2418) +#define MI_PREDICATE_RESULT_1 _MMIO(0x241c) #define MI_PREDICATE_RESULT_2 _MMIO(0x2214) #define LOWER_SLICE_ENABLED (1 << 0) #define LOWER_SLICE_DISABLED (0 << 0) @@ -1956,8 +1958,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ICL_DPHY_CHKN(port) _MMIO(_ICL_COMBOPHY(port) + _ICL_DPHY_CHKN_REG) #define ICL_DPHY_CHKN_AFE_OVER_PPI_STRAP REG_BIT(7) -#define MG_PHY_PORT_LN(ln, port, ln0p1, ln0p2, ln1p1) \ - _MMIO(_PORT((port) - PORT_C, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1))) +#define MG_PHY_PORT_LN(ln, tc_port, ln0p1, ln0p2, ln1p1) \ + _MMIO(_PORT(tc_port, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1))) #define MG_TX_LINK_PARAMS_TX1LN0_PORT1 0x16812C #define MG_TX_LINK_PARAMS_TX1LN1_PORT1 0x16852C @@ -1967,10 +1969,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_LINK_PARAMS_TX1LN1_PORT3 0x16A52C #define MG_TX_LINK_PARAMS_TX1LN0_PORT4 0x16B12C #define MG_TX_LINK_PARAMS_TX1LN1_PORT4 0x16B52C -#define MG_TX1_LINK_PARAMS(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_LINK_PARAMS_TX1LN0_PORT1, \ - MG_TX_LINK_PARAMS_TX1LN0_PORT2, \ - MG_TX_LINK_PARAMS_TX1LN1_PORT1) +#define MG_TX1_LINK_PARAMS(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_LINK_PARAMS_TX1LN0_PORT1, \ + MG_TX_LINK_PARAMS_TX1LN0_PORT2, \ + MG_TX_LINK_PARAMS_TX1LN1_PORT1) #define MG_TX_LINK_PARAMS_TX2LN0_PORT1 0x1680AC #define MG_TX_LINK_PARAMS_TX2LN1_PORT1 0x1684AC @@ -1980,10 +1982,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_LINK_PARAMS_TX2LN1_PORT3 0x16A4AC #define MG_TX_LINK_PARAMS_TX2LN0_PORT4 0x16B0AC #define MG_TX_LINK_PARAMS_TX2LN1_PORT4 0x16B4AC -#define MG_TX2_LINK_PARAMS(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_LINK_PARAMS_TX2LN0_PORT1, \ - MG_TX_LINK_PARAMS_TX2LN0_PORT2, \ - MG_TX_LINK_PARAMS_TX2LN1_PORT1) +#define MG_TX2_LINK_PARAMS(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_LINK_PARAMS_TX2LN0_PORT1, \ + MG_TX_LINK_PARAMS_TX2LN0_PORT2, \ + MG_TX_LINK_PARAMS_TX2LN1_PORT1) #define CRI_USE_FS32 (1 << 5) #define MG_TX_PISO_READLOAD_TX1LN0_PORT1 0x16814C @@ -1994,10 +1996,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_PISO_READLOAD_TX1LN1_PORT3 0x16A54C #define MG_TX_PISO_READLOAD_TX1LN0_PORT4 0x16B14C #define MG_TX_PISO_READLOAD_TX1LN1_PORT4 0x16B54C -#define MG_TX1_PISO_READLOAD(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_PISO_READLOAD_TX1LN0_PORT1, \ - MG_TX_PISO_READLOAD_TX1LN0_PORT2, \ - MG_TX_PISO_READLOAD_TX1LN1_PORT1) +#define MG_TX1_PISO_READLOAD(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_PISO_READLOAD_TX1LN0_PORT1, \ + MG_TX_PISO_READLOAD_TX1LN0_PORT2, \ + MG_TX_PISO_READLOAD_TX1LN1_PORT1) #define MG_TX_PISO_READLOAD_TX2LN0_PORT1 0x1680CC #define MG_TX_PISO_READLOAD_TX2LN1_PORT1 0x1684CC @@ -2007,10 +2009,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_PISO_READLOAD_TX2LN1_PORT3 0x16A4CC #define MG_TX_PISO_READLOAD_TX2LN0_PORT4 0x16B0CC #define MG_TX_PISO_READLOAD_TX2LN1_PORT4 0x16B4CC -#define MG_TX2_PISO_READLOAD(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_PISO_READLOAD_TX2LN0_PORT1, \ - MG_TX_PISO_READLOAD_TX2LN0_PORT2, \ - MG_TX_PISO_READLOAD_TX2LN1_PORT1) +#define MG_TX2_PISO_READLOAD(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_PISO_READLOAD_TX2LN0_PORT1, \ + MG_TX_PISO_READLOAD_TX2LN0_PORT2, \ + MG_TX_PISO_READLOAD_TX2LN1_PORT1) #define CRI_CALCINIT (1 << 1) #define MG_TX_SWINGCTRL_TX1LN0_PORT1 0x168148 @@ -2021,10 +2023,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_SWINGCTRL_TX1LN1_PORT3 0x16A548 #define MG_TX_SWINGCTRL_TX1LN0_PORT4 0x16B148 #define MG_TX_SWINGCTRL_TX1LN1_PORT4 0x16B548 -#define MG_TX1_SWINGCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_SWINGCTRL_TX1LN0_PORT1, \ - MG_TX_SWINGCTRL_TX1LN0_PORT2, \ - MG_TX_SWINGCTRL_TX1LN1_PORT1) +#define MG_TX1_SWINGCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_SWINGCTRL_TX1LN0_PORT1, \ + MG_TX_SWINGCTRL_TX1LN0_PORT2, \ + MG_TX_SWINGCTRL_TX1LN1_PORT1) #define MG_TX_SWINGCTRL_TX2LN0_PORT1 0x1680C8 #define MG_TX_SWINGCTRL_TX2LN1_PORT1 0x1684C8 @@ -2034,10 +2036,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_SWINGCTRL_TX2LN1_PORT3 0x16A4C8 #define MG_TX_SWINGCTRL_TX2LN0_PORT4 0x16B0C8 #define MG_TX_SWINGCTRL_TX2LN1_PORT4 0x16B4C8 -#define MG_TX2_SWINGCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_SWINGCTRL_TX2LN0_PORT1, \ - MG_TX_SWINGCTRL_TX2LN0_PORT2, \ - MG_TX_SWINGCTRL_TX2LN1_PORT1) +#define MG_TX2_SWINGCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_SWINGCTRL_TX2LN0_PORT1, \ + MG_TX_SWINGCTRL_TX2LN0_PORT2, \ + MG_TX_SWINGCTRL_TX2LN1_PORT1) #define CRI_TXDEEMPH_OVERRIDE_17_12(x) ((x) << 0) #define CRI_TXDEEMPH_OVERRIDE_17_12_MASK (0x3F << 0) @@ -2049,10 +2051,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_DRVCTRL_TX1LN1_TXPORT3 0x16A544 #define MG_TX_DRVCTRL_TX1LN0_TXPORT4 0x16B144 #define MG_TX_DRVCTRL_TX1LN1_TXPORT4 0x16B544 -#define MG_TX1_DRVCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DRVCTRL_TX1LN0_TXPORT1, \ - MG_TX_DRVCTRL_TX1LN0_TXPORT2, \ - MG_TX_DRVCTRL_TX1LN1_TXPORT1) +#define MG_TX1_DRVCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DRVCTRL_TX1LN0_TXPORT1, \ + MG_TX_DRVCTRL_TX1LN0_TXPORT2, \ + MG_TX_DRVCTRL_TX1LN1_TXPORT1) #define MG_TX_DRVCTRL_TX2LN0_PORT1 0x1680C4 #define MG_TX_DRVCTRL_TX2LN1_PORT1 0x1684C4 @@ -2062,10 +2064,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_DRVCTRL_TX2LN1_PORT3 0x16A4C4 #define MG_TX_DRVCTRL_TX2LN0_PORT4 0x16B0C4 #define MG_TX_DRVCTRL_TX2LN1_PORT4 0x16B4C4 -#define MG_TX2_DRVCTRL(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DRVCTRL_TX2LN0_PORT1, \ - MG_TX_DRVCTRL_TX2LN0_PORT2, \ - MG_TX_DRVCTRL_TX2LN1_PORT1) +#define MG_TX2_DRVCTRL(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DRVCTRL_TX2LN0_PORT1, \ + MG_TX_DRVCTRL_TX2LN0_PORT2, \ + MG_TX_DRVCTRL_TX2LN1_PORT1) #define CRI_TXDEEMPH_OVERRIDE_11_6(x) ((x) << 24) #define CRI_TXDEEMPH_OVERRIDE_11_6_MASK (0x3F << 24) #define CRI_TXDEEMPH_OVERRIDE_EN (1 << 22) @@ -2082,10 +2084,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_CLKHUB_LN1_PORT3 0x16A79C #define MG_CLKHUB_LN0_PORT4 0x16B39C #define MG_CLKHUB_LN1_PORT4 0x16B79C -#define MG_CLKHUB(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_CLKHUB_LN0_PORT1, \ - MG_CLKHUB_LN0_PORT2, \ - MG_CLKHUB_LN1_PORT1) +#define MG_CLKHUB(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_CLKHUB_LN0_PORT1, \ + MG_CLKHUB_LN0_PORT2, \ + MG_CLKHUB_LN1_PORT1) #define CFG_LOW_RATE_LKREN_EN (1 << 11) #define MG_TX_DCC_TX1LN0_PORT1 0x168110 @@ -2096,10 +2098,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_DCC_TX1LN1_PORT3 0x16A510 #define MG_TX_DCC_TX1LN0_PORT4 0x16B110 #define MG_TX_DCC_TX1LN1_PORT4 0x16B510 -#define MG_TX1_DCC(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DCC_TX1LN0_PORT1, \ - MG_TX_DCC_TX1LN0_PORT2, \ - MG_TX_DCC_TX1LN1_PORT1) +#define MG_TX1_DCC(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DCC_TX1LN0_PORT1, \ + MG_TX_DCC_TX1LN0_PORT2, \ + MG_TX_DCC_TX1LN1_PORT1) #define MG_TX_DCC_TX2LN0_PORT1 0x168090 #define MG_TX_DCC_TX2LN1_PORT1 0x168490 #define MG_TX_DCC_TX2LN0_PORT2 0x169090 @@ -2108,10 +2110,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_TX_DCC_TX2LN1_PORT3 0x16A490 #define MG_TX_DCC_TX2LN0_PORT4 0x16B090 #define MG_TX_DCC_TX2LN1_PORT4 0x16B490 -#define MG_TX2_DCC(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_TX_DCC_TX2LN0_PORT1, \ - MG_TX_DCC_TX2LN0_PORT2, \ - MG_TX_DCC_TX2LN1_PORT1) +#define MG_TX2_DCC(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_TX_DCC_TX2LN0_PORT1, \ + MG_TX_DCC_TX2LN0_PORT2, \ + MG_TX_DCC_TX2LN1_PORT1) #define CFG_AMI_CK_DIV_OVERRIDE_VAL(x) ((x) << 25) #define CFG_AMI_CK_DIV_OVERRIDE_VAL_MASK (0x3 << 25) #define CFG_AMI_CK_DIV_OVERRIDE_EN (1 << 24) @@ -2124,10 +2126,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MG_DP_MODE_LN1_ACU_PORT3 0x16A7A0 #define MG_DP_MODE_LN0_ACU_PORT4 0x16B3A0 #define MG_DP_MODE_LN1_ACU_PORT4 0x16B7A0 -#define MG_DP_MODE(ln, port) \ - MG_PHY_PORT_LN(ln, port, MG_DP_MODE_LN0_ACU_PORT1, \ - MG_DP_MODE_LN0_ACU_PORT2, \ - MG_DP_MODE_LN1_ACU_PORT1) +#define MG_DP_MODE(ln, tc_port) \ + MG_PHY_PORT_LN(ln, tc_port, MG_DP_MODE_LN0_ACU_PORT1, \ + MG_DP_MODE_LN0_ACU_PORT2, \ + MG_DP_MODE_LN1_ACU_PORT1) #define MG_DP_MODE_CFG_DP_X2_MODE (1 << 7) #define MG_DP_MODE_CFG_DP_X1_MODE (1 << 6) #define MG_DP_MODE_CFG_TR2PWR_GATING (1 << 5) @@ -2166,13 +2168,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_FIA(fia, off) _MMIO(_FIA(fia) + (off)) /* ICL PHY DFLEX registers */ -#define PORT_TX_DFLEXDPMLE1(fia) _MMIO_FIA((fia), 0x008C0) -#define DFLEXDPMLE1_DPMLETC_MASK(tc_port) (0xf << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML0(tc_port) (1 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML1_0(tc_port) (3 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML3(tc_port) (8 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML3_2(tc_port) (12 << (4 * (tc_port))) -#define DFLEXDPMLE1_DPMLETC_ML3_0(tc_port) (15 << (4 * (tc_port))) +#define PORT_TX_DFLEXDPMLE1(fia) _MMIO_FIA((fia), 0x008C0) +#define DFLEXDPMLE1_DPMLETC_MASK(idx) (0xf << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML0(idx) (1 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML1_0(idx) (3 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML3(idx) (8 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML3_2(idx) (12 << (4 * (idx))) +#define DFLEXDPMLE1_DPMLETC_ML3_0(idx) (15 << (4 * (idx))) /* BXT PHY Ref registers */ #define _PORT_REF_DW3_A 0x16218C @@ -2483,6 +2485,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ +/* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */ +#define GEN8_RING_CS_GPR(base, n) _MMIO((base) + 0x600 + (n) * 8) +#define GEN8_RING_CS_GPR_UDW(base, n) _MMIO((base) + 0x600 + (n) * 8 + 4) + #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4) #define RING_FORCE_TO_NONPRIV_ACCESS_RW (0 << 28) /* CFL+ & Gen11+ */ #define RING_FORCE_TO_NONPRIV_ACCESS_RD (1 << 28) @@ -2705,6 +2711,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VLV_GU_CTL0 _MMIO(VLV_DISPLAY_BASE + 0x2030) #define VLV_GU_CTL1 _MMIO(VLV_DISPLAY_BASE + 0x2034) #define SCPD0 _MMIO(0x209c) /* 915+ only */ +#define CSTATE_RENDER_CLOCK_GATE_DISABLE (1 << 5) #define GEN2_IER _MMIO(0x20a0) #define GEN2_IIR _MMIO(0x20a4) #define GEN2_IMR _MMIO(0x20a8) @@ -2878,6 +2885,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN6_RC_SLEEP_PSMI_CONTROL _MMIO(0x2050) #define GEN6_PSMI_SLEEP_MSG_DISABLE (1 << 0) +#define GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) #define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12) #define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1 << 10) @@ -2956,6 +2964,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) @@ -3558,6 +3568,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _PALETTE_A 0xa000 #define _PALETTE_B 0xa800 #define _CHV_PALETTE_C 0xc000 +#define PALETTE_RED_MASK REG_GENMASK(23, 16) +#define PALETTE_GREEN_MASK REG_GENMASK(15, 8) +#define PALETTE_BLUE_MASK REG_GENMASK(7, 0) #define PALETTE(pipe, i) _MMIO(DISPLAY_MMIO_BASE(dev_priv) + \ _PICK((pipe), _PALETTE_A, \ _PALETTE_B, _CHV_PALETTE_C) + \ @@ -4038,10 +4051,15 @@ enum { #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) #define MSCUNIT_CLKGATE_DIS (1 << 10) +#define L3_CLKGATE_DIS REG_BIT(16) +#define L3_CR2X_CLKGATE_DIS REG_BIT(17) #define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) #define GWUNIT_CLKGATE_DIS (1 << 16) +#define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528) +#define CPSSUNIT_CLKGATE_DIS REG_BIT(9) + #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS (1 << 20) @@ -4135,6 +4153,7 @@ enum { #define _VTOTAL_A 0x6000c #define _VBLANK_A 0x60010 #define _VSYNC_A 0x60014 +#define _EXITLINE_A 0x60018 #define _PIPEASRC 0x6001c #define _BCLRPAT_A 0x60020 #define _VSYNCSHIFT_A 0x60028 @@ -4186,10 +4205,22 @@ enum { #define PIPESRC(trans) _MMIO_TRANS2(trans, _PIPEASRC) #define PIPE_MULT(trans) _MMIO_TRANS2(trans, _PIPE_MULT_A) -/* HSW+ eDP PSR registers */ -#define HSW_EDP_PSR_BASE 0x64800 -#define BDW_EDP_PSR_BASE 0x6f800 -#define EDP_PSR_CTL _MMIO(dev_priv->psr_mmio_base + 0) +#define EXITLINE(trans) _MMIO_TRANS2(trans, _EXITLINE_A) +#define EXITLINE_ENABLE REG_BIT(31) +#define EXITLINE_MASK REG_GENMASK(12, 0) +#define EXITLINE_SHIFT 0 + +/* + * HSW+ eDP PSR registers + * + * HSW PSR registers are relative to DDIA(_DDI_BUF_CTL_A + 0x800) with just one + * instance of it + */ +#define _HSW_EDP_PSR_BASE 0x64800 +#define _SRD_CTL_A 0x60800 +#define _SRD_CTL_EDP 0x6f800 +#define _PSR_ADJ(tran, reg) (_TRANS2(tran, reg) - dev_priv->hsw_psr_mmio_adjust) +#define EDP_PSR_CTL(tran) _MMIO(_PSR_ADJ(tran, _SRD_CTL_A)) #define EDP_PSR_ENABLE (1 << 31) #define BDW_PSR_SINGLE_FRAME (1 << 30) #define EDP_PSR_RESTORE_PSR_ACTIVE_CTX_MASK (1 << 29) /* SW can't modify */ @@ -4215,27 +4246,40 @@ enum { #define EDP_PSR_TP1_TIME_0us (3 << 4) #define EDP_PSR_IDLE_FRAME_SHIFT 0 -/* Bspec claims those aren't shifted but stay at 0x64800 */ +/* + * Until TGL, IMR/IIR are fixed at 0x648xx. On TGL+ those registers are relative + * to transcoder and bits defined for each one as if using no shift (i.e. as if + * it was for TRANSCODER_EDP) + */ #define EDP_PSR_IMR _MMIO(0x64834) #define EDP_PSR_IIR _MMIO(0x64838) -#define EDP_PSR_ERROR(shift) (1 << ((shift) + 2)) -#define EDP_PSR_POST_EXIT(shift) (1 << ((shift) + 1)) -#define EDP_PSR_PRE_ENTRY(shift) (1 << (shift)) -#define EDP_PSR_TRANSCODER_C_SHIFT 24 -#define EDP_PSR_TRANSCODER_B_SHIFT 16 -#define EDP_PSR_TRANSCODER_A_SHIFT 8 -#define EDP_PSR_TRANSCODER_EDP_SHIFT 0 - -#define EDP_PSR_AUX_CTL _MMIO(dev_priv->psr_mmio_base + 0x10) +#define _PSR_IMR_A 0x60814 +#define _PSR_IIR_A 0x60818 +#define TRANS_PSR_IMR(tran) _MMIO_TRANS2(tran, _PSR_IMR_A) +#define TRANS_PSR_IIR(tran) _MMIO_TRANS2(tran, _PSR_IIR_A) +#define _EDP_PSR_TRANS_SHIFT(trans) ((trans) == TRANSCODER_EDP ? \ + 0 : ((trans) - TRANSCODER_A + 1) * 8) +#define EDP_PSR_TRANS_MASK(trans) (0x7 << _EDP_PSR_TRANS_SHIFT(trans)) +#define EDP_PSR_ERROR(trans) (0x4 << _EDP_PSR_TRANS_SHIFT(trans)) +#define EDP_PSR_POST_EXIT(trans) (0x2 << _EDP_PSR_TRANS_SHIFT(trans)) +#define EDP_PSR_PRE_ENTRY(trans) (0x1 << _EDP_PSR_TRANS_SHIFT(trans)) + +#define _SRD_AUX_CTL_A 0x60810 +#define _SRD_AUX_CTL_EDP 0x6f810 +#define EDP_PSR_AUX_CTL(tran) _MMIO(_PSR_ADJ(tran, _SRD_AUX_CTL_A)) #define EDP_PSR_AUX_CTL_TIME_OUT_MASK (3 << 26) #define EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK (0x1f << 20) #define EDP_PSR_AUX_CTL_PRECHARGE_2US_MASK (0xf << 16) #define EDP_PSR_AUX_CTL_ERROR_INTERRUPT (1 << 11) #define EDP_PSR_AUX_CTL_BIT_CLOCK_2X_MASK (0x7ff) -#define EDP_PSR_AUX_DATA(i) _MMIO(dev_priv->psr_mmio_base + 0x14 + (i) * 4) /* 5 registers */ +#define _SRD_AUX_DATA_A 0x60814 +#define _SRD_AUX_DATA_EDP 0x6f814 +#define EDP_PSR_AUX_DATA(tran, i) _MMIO(_PSR_ADJ(tran, _SRD_AUX_DATA_A) + (i) + 4) /* 5 registers */ -#define EDP_PSR_STATUS _MMIO(dev_priv->psr_mmio_base + 0x40) +#define _SRD_STATUS_A 0x60840 +#define _SRD_STATUS_EDP 0x6f840 +#define EDP_PSR_STATUS(tran) _MMIO(_PSR_ADJ(tran, _SRD_STATUS_A)) #define EDP_PSR_STATUS_STATE_MASK (7 << 29) #define EDP_PSR_STATUS_STATE_SHIFT 29 #define EDP_PSR_STATUS_STATE_IDLE (0 << 29) @@ -4260,10 +4304,15 @@ enum { #define EDP_PSR_STATUS_SENDING_TP1 (1 << 4) #define EDP_PSR_STATUS_IDLE_MASK 0xf -#define EDP_PSR_PERF_CNT _MMIO(dev_priv->psr_mmio_base + 0x44) +#define _SRD_PERF_CNT_A 0x60844 +#define _SRD_PERF_CNT_EDP 0x6f844 +#define EDP_PSR_PERF_CNT(tran) _MMIO(_PSR_ADJ(tran, _SRD_PERF_CNT_A)) #define EDP_PSR_PERF_CNT_MASK 0xffffff -#define EDP_PSR_DEBUG _MMIO(dev_priv->psr_mmio_base + 0x60) /* PSR_MASK on SKL+ */ +/* PSR_MASK on SKL+ */ +#define _SRD_DEBUG_A 0x60860 +#define _SRD_DEBUG_EDP 0x6f860 +#define EDP_PSR_DEBUG(tran) _MMIO(_PSR_ADJ(tran, _SRD_DEBUG_A)) #define EDP_PSR_DEBUG_MASK_MAX_SLEEP (1 << 28) #define EDP_PSR_DEBUG_MASK_LPSP (1 << 27) #define EDP_PSR_DEBUG_MASK_MEMUP (1 << 26) @@ -4271,7 +4320,9 @@ enum { #define EDP_PSR_DEBUG_MASK_DISP_REG_WRITE (1 << 16) /* Reserved in ICL+ */ #define EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1 << 15) /* SKL+ */ -#define EDP_PSR2_CTL _MMIO(0x6f900) +#define _PSR2_CTL_A 0x60900 +#define _PSR2_CTL_EDP 0x6f900 +#define EDP_PSR2_CTL(tran) _MMIO_TRANS2(tran, _PSR2_CTL_A) #define EDP_PSR2_ENABLE (1 << 31) #define EDP_SU_TRACK_ENABLE (1 << 30) #define EDP_Y_COORDINATE_VALID (1 << 26) /* GLK and CNL+ */ @@ -4293,8 +4344,8 @@ enum { #define _PSR_EVENT_TRANS_B 0x61848 #define _PSR_EVENT_TRANS_C 0x62848 #define _PSR_EVENT_TRANS_D 0x63848 -#define _PSR_EVENT_TRANS_EDP 0x6F848 -#define PSR_EVENT(trans) _MMIO_TRANS2(trans, _PSR_EVENT_TRANS_A) +#define _PSR_EVENT_TRANS_EDP 0x6f848 +#define PSR_EVENT(tran) _MMIO_TRANS2(tran, _PSR_EVENT_TRANS_A) #define PSR_EVENT_PSR2_WD_TIMER_EXPIRE (1 << 17) #define PSR_EVENT_PSR2_DISABLED (1 << 16) #define PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN (1 << 15) @@ -4312,15 +4363,16 @@ enum { #define PSR_EVENT_LPSP_MODE_EXIT (1 << 1) #define PSR_EVENT_PSR_DISABLE (1 << 0) -#define EDP_PSR2_STATUS _MMIO(0x6f940) +#define _PSR2_STATUS_A 0x60940 +#define _PSR2_STATUS_EDP 0x6f940 +#define EDP_PSR2_STATUS(tran) _MMIO_TRANS2(tran, _PSR2_STATUS_A) #define EDP_PSR2_STATUS_STATE_MASK (0xf << 28) #define EDP_PSR2_STATUS_STATE_SHIFT 28 -#define _PSR2_SU_STATUS_0 0x6F914 -#define _PSR2_SU_STATUS_1 0x6F918 -#define _PSR2_SU_STATUS_2 0x6F91C -#define _PSR2_SU_STATUS(index) _MMIO(_PICK_EVEN((index), _PSR2_SU_STATUS_0, _PSR2_SU_STATUS_1)) -#define PSR2_SU_STATUS(frame) (_PSR2_SU_STATUS((frame) / 3)) +#define _PSR2_SU_STATUS_A 0x60914 +#define _PSR2_SU_STATUS_EDP 0x6f914 +#define _PSR2_SU_STATUS(tran, index) _MMIO(_TRANS2(tran, _PSR2_SU_STATUS_A) + (index) * 4) +#define PSR2_SU_STATUS(tran, frame) (_PSR2_SU_STATUS(tran, (frame) / 3)) #define PSR2_SU_STATUS_SHIFT(frame) (((frame) % 3) * 10) #define PSR2_SU_STATUS_MASK(frame) (0x3ff << PSR2_SU_STATUS_SHIFT(frame)) #define PSR2_SU_STATUS_FRAMES 8 @@ -4646,6 +4698,7 @@ enum { * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte * of the infoframe structure specified by CEA-861. */ #define VIDEO_DIP_DATA_SIZE 32 +#define VIDEO_DIP_GMP_DATA_SIZE 36 #define VIDEO_DIP_VSC_DATA_SIZE 36 #define VIDEO_DIP_PPS_DATA_SIZE 132 #define VIDEO_DIP_CTL _MMIO(0x61170) @@ -5652,6 +5705,11 @@ enum { #define PIPECONF_CXSR_DOWNCLOCK (1 << 16) #define PIPECONF_EDP_RR_MODE_SWITCH_VLV (1 << 14) #define PIPECONF_COLOR_RANGE_SELECT (1 << 13) +#define PIPECONF_OUTPUT_COLORSPACE_MASK (3 << 11) /* ilk-ivb */ +#define PIPECONF_OUTPUT_COLORSPACE_RGB (0 << 11) /* ilk-ivb */ +#define PIPECONF_OUTPUT_COLORSPACE_YUV601 (1 << 11) /* ilk-ivb */ +#define PIPECONF_OUTPUT_COLORSPACE_YUV709 (2 << 11) /* ilk-ivb */ +#define PIPECONF_OUTPUT_COLORSPACE_YUV_HSW (1 << 11) /* hsw only */ #define PIPECONF_BPC_MASK (0x7 << 5) #define PIPECONF_8BPC (0 << 5) #define PIPECONF_10BPC (1 << 5) @@ -5739,12 +5797,13 @@ enum { #define _PIPEAGCMAX 0x70010 #define _PIPEBGCMAX 0x71010 +#define PIPEGCMAX_RGB_MASK REG_GENMASK(15, 0) #define PIPEGCMAX(pipe, i) _MMIO_PIPE2(pipe, _PIPEAGCMAX + (i) * 4) #define _PIPE_MISC_A 0x70030 #define _PIPE_MISC_B 0x71030 -#define PIPEMISC_YUV420_ENABLE (1 << 27) -#define PIPEMISC_YUV420_MODE_FULL_BLEND (1 << 26) +#define PIPEMISC_YUV420_ENABLE (1 << 27) /* glk+ */ +#define PIPEMISC_YUV420_MODE_FULL_BLEND (1 << 26) /* glk+ */ #define PIPEMISC_HDR_MODE_PRECISION (1 << 23) /* icl+ */ #define PIPEMISC_OUTPUT_COLORSPACE_YUV (1 << 11) #define PIPEMISC_DITHER_BPC_MASK (7 << 5) @@ -6201,6 +6260,7 @@ enum { #define CHV_CURSOR_C_OFFSET 0x700e0 #define IVB_CURSOR_B_OFFSET 0x71080 #define IVB_CURSOR_C_OFFSET 0x72080 +#define TGL_CURSOR_D_OFFSET 0x73080 /* Display A control */ #define _DSPACNTR 0x70180 @@ -7171,11 +7231,17 @@ enum { /* legacy palette */ #define _LGC_PALETTE_A 0x4a000 #define _LGC_PALETTE_B 0x4a800 +#define LGC_PALETTE_RED_MASK REG_GENMASK(23, 16) +#define LGC_PALETTE_GREEN_MASK REG_GENMASK(15, 8) +#define LGC_PALETTE_BLUE_MASK REG_GENMASK(7, 0) #define LGC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _LGC_PALETTE_A, _LGC_PALETTE_B) + (i) * 4) /* ilk/snb precision palette */ #define _PREC_PALETTE_A 0x4b000 #define _PREC_PALETTE_B 0x4c000 +#define PREC_PALETTE_RED_MASK REG_GENMASK(29, 20) +#define PREC_PALETTE_GREEN_MASK REG_GENMASK(19, 10) +#define PREC_PALETTE_BLUE_MASK REG_GENMASK(9, 0) #define PREC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _PREC_PALETTE_A, _PREC_PALETTE_B) + (i) * 4) #define _PREC_PIPEAGCMAX 0x4d000 @@ -7211,6 +7277,8 @@ enum { #define TGL_DMC_DEBUG_DC5_COUNT _MMIO(0x101084) #define TGL_DMC_DEBUG_DC6_COUNT _MMIO(0x101088) +#define DMC_DEBUG3 _MMIO(0x101090) + /* interrupts */ #define DE_MASTER_IRQ_CONTROL (1 << 31) #define DE_SPRITEB_FLIP_DONE (1 << 29) @@ -7609,7 +7677,10 @@ enum { #define GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE (1 << 10) #define GEN9_CS_DEBUG_MODE1 _MMIO(0x20ec) +#define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) #define GEN9_CTX_PREEMPT_REG _MMIO(0x2248) +#define GEN12_DISABLE_POSH_BUSY_FF_DOP_CG REG_BIT(11) + #define GEN8_CS_CHICKEN1 _MMIO(0x2580) #define GEN9_PREEMPT_3D_OBJECT_LEVEL (1 << 0) #define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1)) @@ -7634,6 +7705,7 @@ enum { #define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC (1 << 11) + #define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE (1 << 9) #define HIZ_CHICKEN _MMIO(0x7018) # define CHV_HZ_8X8_MODE_IN_1X (1 << 15) @@ -7818,29 +7890,24 @@ enum { SDE_FDI_RXA_CPT) /* south display engine interrupt: ICP/TGP */ -#define SDE_TC6_HOTPLUG_TGP (1 << 29) -#define SDE_TC5_HOTPLUG_TGP (1 << 28) -#define SDE_TC4_HOTPLUG_ICP (1 << 27) -#define SDE_TC3_HOTPLUG_ICP (1 << 26) -#define SDE_TC2_HOTPLUG_ICP (1 << 25) -#define SDE_TC1_HOTPLUG_ICP (1 << 24) #define SDE_GMBUS_ICP (1 << 23) -#define SDE_DDIC_HOTPLUG_TGP (1 << 18) -#define SDE_DDIB_HOTPLUG_ICP (1 << 17) -#define SDE_DDIA_HOTPLUG_ICP (1 << 16) #define SDE_TC_HOTPLUG_ICP(tc_port) (1 << ((tc_port) + 24)) #define SDE_DDI_HOTPLUG_ICP(port) (1 << ((port) + 16)) -#define SDE_DDI_MASK_ICP (SDE_DDIB_HOTPLUG_ICP | \ - SDE_DDIA_HOTPLUG_ICP) -#define SDE_TC_MASK_ICP (SDE_TC4_HOTPLUG_ICP | \ - SDE_TC3_HOTPLUG_ICP | \ - SDE_TC2_HOTPLUG_ICP | \ - SDE_TC1_HOTPLUG_ICP) -#define SDE_DDI_MASK_TGP (SDE_DDIC_HOTPLUG_TGP | \ - SDE_DDI_MASK_ICP) -#define SDE_TC_MASK_TGP (SDE_TC6_HOTPLUG_TGP | \ - SDE_TC5_HOTPLUG_TGP | \ - SDE_TC_MASK_ICP) +#define SDE_DDI_MASK_ICP (SDE_DDI_HOTPLUG_ICP(PORT_B) | \ + SDE_DDI_HOTPLUG_ICP(PORT_A)) +#define SDE_TC_MASK_ICP (SDE_TC_HOTPLUG_ICP(PORT_TC4) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC3) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC2) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC1)) +#define SDE_DDI_MASK_TGP (SDE_DDI_HOTPLUG_ICP(PORT_C) | \ + SDE_DDI_HOTPLUG_ICP(PORT_B) | \ + SDE_DDI_HOTPLUG_ICP(PORT_A)) +#define SDE_TC_MASK_TGP (SDE_TC_HOTPLUG_ICP(PORT_TC6) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC5) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC4) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC3) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC2) | \ + SDE_TC_HOTPLUG_ICP(PORT_TC1)) #define SDEISR _MMIO(0xc4000) #define SDEIMR _MMIO(0xc4004) @@ -7907,26 +7974,13 @@ enum { * SHOTPLUG_CTL_DDI and SHOTPLUG_CTL_TC. */ -#define SHOTPLUG_CTL_DDI _MMIO(0xc4030) -#define TGP_DDIC_HPD_ENABLE (1 << 11) -#define TGP_DDIC_HPD_STATUS_MASK (3 << 8) -#define TGP_DDIC_HPD_NO_DETECT (0 << 8) -#define TGP_DDIC_HPD_SHORT_DETECT (1 << 8) -#define TGP_DDIC_HPD_LONG_DETECT (2 << 8) -#define TGP_DDIC_HPD_SHORT_LONG_DETECT (3 << 8) -#define ICP_DDIB_HPD_ENABLE (1 << 7) -#define ICP_DDIB_HPD_STATUS_MASK (3 << 4) -#define ICP_DDIB_HPD_NO_DETECT (0 << 4) -#define ICP_DDIB_HPD_SHORT_DETECT (1 << 4) -#define ICP_DDIB_HPD_LONG_DETECT (2 << 4) -#define ICP_DDIB_HPD_SHORT_LONG_DETECT (3 << 4) -#define ICP_DDIA_HPD_ENABLE (1 << 3) -#define ICP_DDIA_HPD_OP_DRIVE_1 (1 << 2) -#define ICP_DDIA_HPD_STATUS_MASK (3 << 0) -#define ICP_DDIA_HPD_NO_DETECT (0 << 0) -#define ICP_DDIA_HPD_SHORT_DETECT (1 << 0) -#define ICP_DDIA_HPD_LONG_DETECT (2 << 0) -#define ICP_DDIA_HPD_SHORT_LONG_DETECT (3 << 0) +#define SHOTPLUG_CTL_DDI _MMIO(0xc4030) +#define SHOTPLUG_CTL_DDI_HPD_ENABLE(port) (0x8 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_STATUS_MASK(port) (0x3 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_NO_DETECT(port) (0x0 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_SHORT_DETECT(port) (0x1 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_LONG_DETECT(port) (0x2 << (4 * (port))) +#define SHOTPLUG_CTL_DDI_HPD_SHORT_LONG_DETECT(port) (0x3 << (4 * (port))) #define SHOTPLUG_CTL_TC _MMIO(0xc4034) #define ICP_TC_HPD_ENABLE(tc_port) (8 << (tc_port) * 4) @@ -8037,14 +8091,15 @@ enum { #define ICP_TC_HPD_LONG_DETECT(tc_port) (2 << (tc_port) * 4) #define ICP_TC_HPD_SHORT_DETECT(tc_port) (1 << (tc_port) * 4) -#define ICP_DDI_HPD_ENABLE_MASK (ICP_DDIB_HPD_ENABLE | \ - ICP_DDIA_HPD_ENABLE) +#define ICP_DDI_HPD_ENABLE_MASK (SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_B) | \ + SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_A)) #define ICP_TC_HPD_ENABLE_MASK (ICP_TC_HPD_ENABLE(PORT_TC4) | \ ICP_TC_HPD_ENABLE(PORT_TC3) | \ ICP_TC_HPD_ENABLE(PORT_TC2) | \ ICP_TC_HPD_ENABLE(PORT_TC1)) -#define TGP_DDI_HPD_ENABLE_MASK (TGP_DDIC_HPD_ENABLE | \ - ICP_DDI_HPD_ENABLE_MASK) +#define TGP_DDI_HPD_ENABLE_MASK (SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_C) | \ + SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_B) | \ + SHOTPLUG_CTL_DDI_HPD_ENABLE(PORT_A)) #define TGP_TC_HPD_ENABLE_MASK (ICP_TC_HPD_ENABLE(PORT_TC6) | \ ICP_TC_HPD_ENABLE(PORT_TC5) | \ ICP_TC_HPD_ENABLE_MASK) @@ -8594,6 +8649,10 @@ enum { #define GEN9_PWRGT_MEDIA_STATUS_MASK (1 << 0) #define GEN9_PWRGT_RENDER_STATUS_MASK (1 << 1) +#define POWERGATE_ENABLE _MMIO(0xa210) +#define VDN_HCP_POWERGATE_ENABLE(n) BIT(((n) * 2) + 3) +#define VDN_MFX_POWERGATE_ENABLE(n) BIT(((n) * 2) + 4) + #define GTFIFODBG _MMIO(0x120000) #define GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV (0x1f << 20) #define GT_FIFO_FREE_ENTRIES_CHV (0x7f << 13) @@ -8831,6 +8890,7 @@ enum { #define GEN9_SAGV_DISABLE 0x0 #define GEN9_SAGV_IS_DISABLED 0x1 #define GEN9_SAGV_ENABLE 0x3 +#define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US 0x23 #define GEN6_PCODE_DATA _MMIO(0x138128) #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 @@ -9094,6 +9154,10 @@ enum { #define HSW_AUD_CHICKENBIT _MMIO(0x65f10) #define SKL_AUD_CODEC_WAKE_SIGNAL (1 << 15) +#define AUD_FREQ_CNTRL _MMIO(0x65900) +#define AUD_PIN_BUF_CTL _MMIO(0x48414) +#define AUD_PIN_BUF_ENABLE REG_BIT(31) + /* * HSW - ICL power wells * @@ -9256,12 +9320,20 @@ enum skl_power_gate { /* HDCP Repeater Registers */ #define HDCP_REP_CTL _MMIO(0x66d00) +#define HDCP_TRANSA_REP_PRESENT BIT(31) +#define HDCP_TRANSB_REP_PRESENT BIT(30) +#define HDCP_TRANSC_REP_PRESENT BIT(29) +#define HDCP_TRANSD_REP_PRESENT BIT(28) #define HDCP_DDIB_REP_PRESENT BIT(30) #define HDCP_DDIA_REP_PRESENT BIT(29) #define HDCP_DDIC_REP_PRESENT BIT(28) #define HDCP_DDID_REP_PRESENT BIT(27) #define HDCP_DDIF_REP_PRESENT BIT(26) #define HDCP_DDIE_REP_PRESENT BIT(25) +#define HDCP_TRANSA_SHA1_M0 (1 << 20) +#define HDCP_TRANSB_SHA1_M0 (2 << 20) +#define HDCP_TRANSC_SHA1_M0 (3 << 20) +#define HDCP_TRANSD_SHA1_M0 (4 << 20) #define HDCP_DDIB_SHA1_M0 (1 << 20) #define HDCP_DDIA_SHA1_M0 (2 << 20) #define HDCP_DDIC_SHA1_M0 (3 << 20) @@ -9301,15 +9373,92 @@ enum skl_power_gate { _PORTE_HDCP_AUTHENC, \ _PORTF_HDCP_AUTHENC) + (x)) #define PORT_HDCP_CONF(port) _PORT_HDCP_AUTHENC(port, 0x0) +#define _TRANSA_HDCP_CONF 0x66400 +#define _TRANSB_HDCP_CONF 0x66500 +#define TRANS_HDCP_CONF(trans) _MMIO_TRANS(trans, _TRANSA_HDCP_CONF, \ + _TRANSB_HDCP_CONF) +#define HDCP_CONF(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_CONF(trans) : \ + PORT_HDCP_CONF(port)) + #define HDCP_CONF_CAPTURE_AN BIT(0) #define HDCP_CONF_AUTH_AND_ENC (BIT(1) | BIT(0)) #define PORT_HDCP_ANINIT(port) _PORT_HDCP_AUTHENC(port, 0x4) +#define _TRANSA_HDCP_ANINIT 0x66404 +#define _TRANSB_HDCP_ANINIT 0x66504 +#define TRANS_HDCP_ANINIT(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_ANINIT, \ + _TRANSB_HDCP_ANINIT) +#define HDCP_ANINIT(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_ANINIT(trans) : \ + PORT_HDCP_ANINIT(port)) + #define PORT_HDCP_ANLO(port) _PORT_HDCP_AUTHENC(port, 0x8) +#define _TRANSA_HDCP_ANLO 0x66408 +#define _TRANSB_HDCP_ANLO 0x66508 +#define TRANS_HDCP_ANLO(trans) _MMIO_TRANS(trans, _TRANSA_HDCP_ANLO, \ + _TRANSB_HDCP_ANLO) +#define HDCP_ANLO(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_ANLO(trans) : \ + PORT_HDCP_ANLO(port)) + #define PORT_HDCP_ANHI(port) _PORT_HDCP_AUTHENC(port, 0xC) +#define _TRANSA_HDCP_ANHI 0x6640C +#define _TRANSB_HDCP_ANHI 0x6650C +#define TRANS_HDCP_ANHI(trans) _MMIO_TRANS(trans, _TRANSA_HDCP_ANHI, \ + _TRANSB_HDCP_ANHI) +#define HDCP_ANHI(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_ANHI(trans) : \ + PORT_HDCP_ANHI(port)) + #define PORT_HDCP_BKSVLO(port) _PORT_HDCP_AUTHENC(port, 0x10) +#define _TRANSA_HDCP_BKSVLO 0x66410 +#define _TRANSB_HDCP_BKSVLO 0x66510 +#define TRANS_HDCP_BKSVLO(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_BKSVLO, \ + _TRANSB_HDCP_BKSVLO) +#define HDCP_BKSVLO(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_BKSVLO(trans) : \ + PORT_HDCP_BKSVLO(port)) + #define PORT_HDCP_BKSVHI(port) _PORT_HDCP_AUTHENC(port, 0x14) +#define _TRANSA_HDCP_BKSVHI 0x66414 +#define _TRANSB_HDCP_BKSVHI 0x66514 +#define TRANS_HDCP_BKSVHI(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_BKSVHI, \ + _TRANSB_HDCP_BKSVHI) +#define HDCP_BKSVHI(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_BKSVHI(trans) : \ + PORT_HDCP_BKSVHI(port)) + #define PORT_HDCP_RPRIME(port) _PORT_HDCP_AUTHENC(port, 0x18) +#define _TRANSA_HDCP_RPRIME 0x66418 +#define _TRANSB_HDCP_RPRIME 0x66518 +#define TRANS_HDCP_RPRIME(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_RPRIME, \ + _TRANSB_HDCP_RPRIME) +#define HDCP_RPRIME(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_RPRIME(trans) : \ + PORT_HDCP_RPRIME(port)) + #define PORT_HDCP_STATUS(port) _PORT_HDCP_AUTHENC(port, 0x1C) +#define _TRANSA_HDCP_STATUS 0x6641C +#define _TRANSB_HDCP_STATUS 0x6651C +#define TRANS_HDCP_STATUS(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP_STATUS, \ + _TRANSB_HDCP_STATUS) +#define HDCP_STATUS(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP_STATUS(trans) : \ + PORT_HDCP_STATUS(port)) + #define HDCP_STATUS_STREAM_A_ENC BIT(31) #define HDCP_STATUS_STREAM_B_ENC BIT(30) #define HDCP_STATUS_STREAM_C_ENC BIT(29) @@ -9336,23 +9485,44 @@ enum skl_power_gate { _PORTD_HDCP2_BASE, \ _PORTE_HDCP2_BASE, \ _PORTF_HDCP2_BASE) + (x)) - -#define HDCP2_AUTH_DDI(port) _PORT_HDCP2_BASE(port, 0x98) +#define PORT_HDCP2_AUTH(port) _PORT_HDCP2_BASE(port, 0x98) +#define _TRANSA_HDCP2_AUTH 0x66498 +#define _TRANSB_HDCP2_AUTH 0x66598 +#define TRANS_HDCP2_AUTH(trans) _MMIO_TRANS(trans, _TRANSA_HDCP2_AUTH, \ + _TRANSB_HDCP2_AUTH) #define AUTH_LINK_AUTHENTICATED BIT(31) #define AUTH_LINK_TYPE BIT(30) #define AUTH_FORCE_CLR_INPUTCTR BIT(19) #define AUTH_CLR_KEYS BIT(18) - -#define HDCP2_CTL_DDI(port) _PORT_HDCP2_BASE(port, 0xB0) +#define HDCP2_AUTH(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP2_AUTH(trans) : \ + PORT_HDCP2_AUTH(port)) + +#define PORT_HDCP2_CTL(port) _PORT_HDCP2_BASE(port, 0xB0) +#define _TRANSA_HDCP2_CTL 0x664B0 +#define _TRANSB_HDCP2_CTL 0x665B0 +#define TRANS_HDCP2_CTL(trans) _MMIO_TRANS(trans, _TRANSA_HDCP2_CTL, \ + _TRANSB_HDCP2_CTL) #define CTL_LINK_ENCRYPTION_REQ BIT(31) - -#define HDCP2_STATUS_DDI(port) _PORT_HDCP2_BASE(port, 0xB4) -#define STREAM_ENCRYPTION_STATUS_A BIT(31) -#define STREAM_ENCRYPTION_STATUS_B BIT(30) -#define STREAM_ENCRYPTION_STATUS_C BIT(29) +#define HDCP2_CTL(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP2_CTL(trans) : \ + PORT_HDCP2_CTL(port)) + +#define PORT_HDCP2_STATUS(port) _PORT_HDCP2_BASE(port, 0xB4) +#define _TRANSA_HDCP2_STATUS 0x664B4 +#define _TRANSB_HDCP2_STATUS 0x665B4 +#define TRANS_HDCP2_STATUS(trans) _MMIO_TRANS(trans, \ + _TRANSA_HDCP2_STATUS, \ + _TRANSB_HDCP2_STATUS) #define LINK_TYPE_STATUS BIT(22) #define LINK_AUTH_STATUS BIT(21) #define LINK_ENCRYPTION_STATUS BIT(20) +#define HDCP2_STATUS(dev_priv, trans, port) \ + (INTEL_GEN(dev_priv) >= 12 ? \ + TRANS_HDCP2_STATUS(trans) : \ + PORT_HDCP2_STATUS(port)) /* Per-pipe DDI Function Control */ #define _TRANS_DDI_FUNC_CTL_A 0x60400 @@ -9419,7 +9589,9 @@ enum skl_power_gate { /* DisplayPort Transport Control */ #define _DP_TP_CTL_A 0x64040 #define _DP_TP_CTL_B 0x64140 +#define _TGL_DP_TP_CTL_A 0x60540 #define DP_TP_CTL(port) _MMIO_PORT(port, _DP_TP_CTL_A, _DP_TP_CTL_B) +#define TGL_DP_TP_CTL(tran) _MMIO_TRANS2((tran), _TGL_DP_TP_CTL_A) #define DP_TP_CTL_ENABLE (1 << 31) #define DP_TP_CTL_FEC_ENABLE (1 << 30) #define DP_TP_CTL_MODE_SST (0 << 27) @@ -9439,7 +9611,9 @@ enum skl_power_gate { /* DisplayPort Transport Status */ #define _DP_TP_STATUS_A 0x64044 #define _DP_TP_STATUS_B 0x64144 +#define _TGL_DP_TP_STATUS_A 0x60544 #define DP_TP_STATUS(port) _MMIO_PORT(port, _DP_TP_STATUS_A, _DP_TP_STATUS_B) +#define TGL_DP_TP_STATUS(tran) _MMIO_TRANS2((tran), _TGL_DP_TP_STATUS_A) #define DP_TP_STATUS_FEC_ENABLE_LIVE (1 << 28) #define DP_TP_STATUS_IDLE_DONE (1 << 25) #define DP_TP_STATUS_ACT_SENT (1 << 24) @@ -9594,17 +9768,7 @@ enum skl_power_gate { #define _TRANSC_MSA_MISC 0x62410 #define _TRANS_EDP_MSA_MISC 0x6f410 #define TRANS_MSA_MISC(tran) _MMIO_TRANS2(tran, _TRANSA_MSA_MISC) - -#define TRANS_MSA_SYNC_CLK (1 << 0) -#define TRANS_MSA_SAMPLING_444 (2 << 1) -#define TRANS_MSA_CLRSP_YCBCR (2 << 3) -#define TRANS_MSA_6_BPC (0 << 5) -#define TRANS_MSA_8_BPC (1 << 5) -#define TRANS_MSA_10_BPC (2 << 5) -#define TRANS_MSA_12_BPC (3 << 5) -#define TRANS_MSA_16_BPC (4 << 5) -#define TRANS_MSA_CEA_RANGE (1 << 3) -#define TRANS_MSA_USE_VSC_SDP (1 << 14) +/* See DP_MSA_MISC_* for the bit definitions */ /* LCPLL Control */ #define LCPLL_CTL _MMIO(0x130040) @@ -9645,7 +9809,10 @@ enum skl_power_gate { #define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe) << 20) #define CDCLK_DIVMUX_CD_OVERRIDE (1 << 19) #define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3) +#define ICL_CDCLK_CD2X_PIPE(pipe) (_PICK(pipe, 0, 2, 6) << 19) #define ICL_CDCLK_CD2X_PIPE_NONE (7 << 19) +#define TGL_CDCLK_CD2X_PIPE(pipe) BXT_CDCLK_CD2X_PIPE(pipe) +#define TGL_CDCLK_CD2X_PIPE_NONE ICL_CDCLK_CD2X_PIPE_NONE #define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1 << 16) #define CDCLK_FREQ_DECIMAL_MASK (0x7ff) @@ -9966,6 +10133,160 @@ enum skl_power_gate { _TGL_DPLL1_CFGCR1, \ _TGL_TBTPLL_CFGCR1) +#define _DKL_PHY1_BASE 0x168000 +#define _DKL_PHY2_BASE 0x169000 +#define _DKL_PHY3_BASE 0x16A000 +#define _DKL_PHY4_BASE 0x16B000 +#define _DKL_PHY5_BASE 0x16C000 +#define _DKL_PHY6_BASE 0x16D000 + +/* DEKEL PHY MMIO Address = Phy base + (internal address & ~index_mask) */ +#define _DKL_PLL_DIV0 0x200 +#define DKL_PLL_DIV0_INTEG_COEFF(x) ((x) << 16) +#define DKL_PLL_DIV0_INTEG_COEFF_MASK (0x1F << 16) +#define DKL_PLL_DIV0_PROP_COEFF(x) ((x) << 12) +#define DKL_PLL_DIV0_PROP_COEFF_MASK (0xF << 12) +#define DKL_PLL_DIV0_FBPREDIV_SHIFT (8) +#define DKL_PLL_DIV0_FBPREDIV(x) ((x) << DKL_PLL_DIV0_FBPREDIV_SHIFT) +#define DKL_PLL_DIV0_FBPREDIV_MASK (0xF << DKL_PLL_DIV0_FBPREDIV_SHIFT) +#define DKL_PLL_DIV0_FBDIV_INT(x) ((x) << 0) +#define DKL_PLL_DIV0_FBDIV_INT_MASK (0xFF << 0) +#define DKL_PLL_DIV0(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_DIV0) + +#define _DKL_PLL_DIV1 0x204 +#define DKL_PLL_DIV1_IREF_TRIM(x) ((x) << 16) +#define DKL_PLL_DIV1_IREF_TRIM_MASK (0x1F << 16) +#define DKL_PLL_DIV1_TDC_TARGET_CNT(x) ((x) << 0) +#define DKL_PLL_DIV1_TDC_TARGET_CNT_MASK (0xFF << 0) +#define DKL_PLL_DIV1(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_DIV1) + +#define _DKL_PLL_SSC 0x210 +#define DKL_PLL_SSC_IREF_NDIV_RATIO(x) ((x) << 29) +#define DKL_PLL_SSC_IREF_NDIV_RATIO_MASK (0x7 << 29) +#define DKL_PLL_SSC_STEP_LEN(x) ((x) << 16) +#define DKL_PLL_SSC_STEP_LEN_MASK (0xFF << 16) +#define DKL_PLL_SSC_STEP_NUM(x) ((x) << 11) +#define DKL_PLL_SSC_STEP_NUM_MASK (0x7 << 11) +#define DKL_PLL_SSC_EN (1 << 9) +#define DKL_PLL_SSC(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_SSC) + +#define _DKL_PLL_BIAS 0x214 +#define DKL_PLL_BIAS_FRAC_EN_H (1 << 30) +#define DKL_PLL_BIAS_FBDIV_SHIFT (8) +#define DKL_PLL_BIAS_FBDIV_FRAC(x) ((x) << DKL_PLL_BIAS_FBDIV_SHIFT) +#define DKL_PLL_BIAS_FBDIV_FRAC_MASK (0x3FFFFF << DKL_PLL_BIAS_FBDIV_SHIFT) +#define DKL_PLL_BIAS(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_BIAS) + +#define _DKL_PLL_TDC_COLDST_BIAS 0x218 +#define DKL_PLL_TDC_SSC_STEP_SIZE(x) ((x) << 8) +#define DKL_PLL_TDC_SSC_STEP_SIZE_MASK (0xFF << 8) +#define DKL_PLL_TDC_FEED_FWD_GAIN(x) ((x) << 0) +#define DKL_PLL_TDC_FEED_FWD_GAIN_MASK (0xFF << 0) +#define DKL_PLL_TDC_COLDST_BIAS(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_PLL_TDC_COLDST_BIAS) + +#define _DKL_REFCLKIN_CTL 0x12C +/* Bits are the same as MG_REFCLKIN_CTL */ +#define DKL_REFCLKIN_CTL(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_REFCLKIN_CTL) + +#define _DKL_CLKTOP2_HSCLKCTL 0xD4 +/* Bits are the same as MG_CLKTOP2_HSCLKCTL */ +#define DKL_CLKTOP2_HSCLKCTL(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_CLKTOP2_HSCLKCTL) + +#define _DKL_CLKTOP2_CORECLKCTL1 0xD8 +/* Bits are the same as MG_CLKTOP2_CORECLKCTL1 */ +#define DKL_CLKTOP2_CORECLKCTL1(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_CLKTOP2_CORECLKCTL1) + +#define _DKL_TX_DPCNTL0 0x2C0 +#define DKL_TX_PRESHOOT_COEFF(x) ((x) << 13) +#define DKL_TX_PRESHOOT_COEFF_MASK (0x1f << 13) +#define DKL_TX_DE_EMPHASIS_COEFF(x) ((x) << 8) +#define DKL_TX_DE_EMPAHSIS_COEFF_MASK (0x1f << 8) +#define DKL_TX_VSWING_CONTROL(x) ((x) << 0) +#define DKL_TX_VSWING_CONTROL_MASK (0x7 << 0) +#define DKL_TX_DPCNTL0(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DPCNTL0) + +#define _DKL_TX_DPCNTL1 0x2C4 +/* Bits are the same as DKL_TX_DPCNTRL0 */ +#define DKL_TX_DPCNTL1(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DPCNTL1) + +#define _DKL_TX_DPCNTL2 0x2C8 +#define DKL_TX_DP20BITMODE (1 << 2) +#define DKL_TX_DPCNTL2(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DPCNTL2) + +#define _DKL_TX_FW_CALIB 0x2F8 +#define DKL_TX_CFG_DISABLE_WAIT_INIT (1 << 7) +#define DKL_TX_FW_CALIB(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_FW_CALIB) + +#define _DKL_TX_DW17 0xDC4 +#define DKL_TX_DW17(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DW17) + +#define _DKL_TX_DW18 0xDC8 +#define DKL_TX_DW18(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_TX_DW18) + +#define _DKL_DP_MODE 0xA0 +#define DKL_DP_MODE(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_DP_MODE) + +#define _DKL_CMN_UC_DW27 0x36C +#define DKL_CMN_UC_DW27_UC_HEALTH (0x1 << 15) +#define DKL_CMN_UC_DW_27(tc_port) _MMIO(_PORT(tc_port, \ + _DKL_PHY1_BASE, \ + _DKL_PHY2_BASE) + \ + _DKL_CMN_UC_DW27) + +/* + * Each Dekel PHY is addressed through a 4KB aperture. Each PHY has more than + * 4KB of register space, so a separate index is programmed in HIP_INDEX_REG0 + * or HIP_INDEX_REG1, based on the port number, to set the upper 2 address + * bits that point the 4KB window into the full PHY register space. + */ +#define _HIP_INDEX_REG0 0x1010A0 +#define _HIP_INDEX_REG1 0x1010A4 +#define HIP_INDEX_REG(tc_port) _MMIO((tc_port) < 4 ? _HIP_INDEX_REG0 \ + : _HIP_INDEX_REG1) +#define _HIP_INDEX_SHIFT(tc_port) (8 * ((tc_port) % 4)) +#define HIP_INDEX_VAL(tc_port, val) ((val) << _HIP_INDEX_SHIFT(tc_port)) + /* BXT display engine PLL */ #define BXT_DE_PLL_CTL _MMIO(0x6d000) #define BXT_DE_PLL_RATIO(x) (x) /* {60,65,100} * 19.2MHz */ @@ -9980,6 +10301,8 @@ enum skl_power_gate { /* GEN9 DC */ #define DC_STATE_EN _MMIO(0x45504) #define DC_STATE_DISABLE 0 +#define DC_STATE_EN_DC3CO REG_BIT(30) +#define DC_STATE_DC3CO_STATUS REG_BIT(29) #define DC_STATE_EN_UPTO_DC5 (1 << 0) #define DC_STATE_EN_DC9 (1 << 3) #define DC_STATE_EN_UPTO_DC6 (2 << 0) @@ -10108,11 +10431,11 @@ enum skl_power_gate { #define _PIPE_A_CSC_COEFF_BV 0x49024 #define _PIPE_A_CSC_MODE 0x49028 -#define ICL_CSC_ENABLE (1 << 31) -#define ICL_OUTPUT_CSC_ENABLE (1 << 30) -#define CSC_BLACK_SCREEN_OFFSET (1 << 2) -#define CSC_POSITION_BEFORE_GAMMA (1 << 1) -#define CSC_MODE_YUV_TO_RGB (1 << 0) +#define ICL_CSC_ENABLE (1 << 31) /* icl+ */ +#define ICL_OUTPUT_CSC_ENABLE (1 << 30) /* icl+ */ +#define CSC_BLACK_SCREEN_OFFSET (1 << 2) /* ilk/snb */ +#define CSC_POSITION_BEFORE_GAMMA (1 << 1) /* pre-glk */ +#define CSC_MODE_YUV_TO_RGB (1 << 0) /* ilk/snb */ #define _PIPE_A_CSC_PREOFF_HI 0x49030 #define _PIPE_A_CSC_PREOFF_ME 0x49034 @@ -10228,6 +10551,9 @@ enum skl_power_gate { #define _PAL_PREC_GC_MAX_A 0x4A410 #define _PAL_PREC_GC_MAX_B 0x4AC10 #define _PAL_PREC_GC_MAX_C 0x4B410 +#define PREC_PAL_DATA_RED_MASK REG_GENMASK(29, 20) +#define PREC_PAL_DATA_GREEN_MASK REG_GENMASK(19, 10) +#define PREC_PAL_DATA_BLUE_MASK REG_GENMASK(9, 0) #define _PAL_PREC_EXT_GC_MAX_A 0x4A420 #define _PAL_PREC_EXT_GC_MAX_B 0x4AC20 #define _PAL_PREC_EXT_GC_MAX_C 0x4B420 @@ -10280,6 +10606,9 @@ enum skl_power_gate { #define CGM_PIPE_MODE_GAMMA (1 << 2) #define CGM_PIPE_MODE_CSC (1 << 1) #define CGM_PIPE_MODE_DEGAMMA (1 << 0) +#define CGM_PIPE_GAMMA_RED_MASK REG_GENMASK(9, 0) +#define CGM_PIPE_GAMMA_GREEN_MASK REG_GENMASK(25, 16) +#define CGM_PIPE_GAMMA_BLUE_MASK REG_GENMASK(9, 0) #define _CGM_PIPE_B_CSC_COEFF01 (VLV_DISPLAY_BASE + 0x69900) #define _CGM_PIPE_B_CSC_COEFF23 (VLV_DISPLAY_BASE + 0x69904) @@ -11527,16 +11856,31 @@ enum skl_power_gate { #define PORT_TX_DFLEXDPSP(fia) _MMIO_FIA((fia), 0x008A0) #define MODULAR_FIA_MASK (1 << 4) -#define TC_LIVE_STATE_TBT(tc_port) (1 << ((tc_port) * 8 + 6)) -#define TC_LIVE_STATE_TC(tc_port) (1 << ((tc_port) * 8 + 5)) -#define DP_LANE_ASSIGNMENT_SHIFT(tc_port) ((tc_port) * 8) -#define DP_LANE_ASSIGNMENT_MASK(tc_port) (0xf << ((tc_port) * 8)) -#define DP_LANE_ASSIGNMENT(tc_port, x) ((x) << ((tc_port) * 8)) +#define TC_LIVE_STATE_TBT(idx) (1 << ((idx) * 8 + 6)) +#define TC_LIVE_STATE_TC(idx) (1 << ((idx) * 8 + 5)) +#define DP_LANE_ASSIGNMENT_SHIFT(idx) ((idx) * 8) +#define DP_LANE_ASSIGNMENT_MASK(idx) (0xf << ((idx) * 8)) +#define DP_LANE_ASSIGNMENT(idx, x) ((x) << ((idx) * 8)) #define PORT_TX_DFLEXDPPMS(fia) _MMIO_FIA((fia), 0x00890) -#define DP_PHY_MODE_STATUS_COMPLETED(tc_port) (1 << (tc_port)) +#define DP_PHY_MODE_STATUS_COMPLETED(idx) (1 << (idx)) #define PORT_TX_DFLEXDPCSSS(fia) _MMIO_FIA((fia), 0x00894) -#define DP_PHY_MODE_STATUS_NOT_SAFE(tc_port) (1 << (tc_port)) +#define DP_PHY_MODE_STATUS_NOT_SAFE(idx) (1 << (idx)) + +#define PORT_TX_DFLEXPA1(fia) _MMIO_FIA((fia), 0x00880) +#define DP_PIN_ASSIGNMENT_SHIFT(idx) ((idx) * 4) +#define DP_PIN_ASSIGNMENT_MASK(idx) (0xf << ((idx) * 4)) +#define DP_PIN_ASSIGNMENT(idx, x) ((x) << ((idx) * 4)) + +/* This register controls the Display State Buffer (DSB) engines. */ +#define _DSBSL_INSTANCE_BASE 0x70B00 +#define DSBSL_INSTANCE(pipe, id) (_DSBSL_INSTANCE_BASE + \ + (pipe) * 0x1000 + (id) * 100) +#define DSB_HEAD(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x0) +#define DSB_TAIL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x4) +#define DSB_CTRL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x8) +#define DSB_ENABLE (1 << 31) +#define DSB_STATUS (1 << 0) #endif /* _I915_REG_H_ */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a53777dd371c..4575f368455d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -169,16 +169,17 @@ remove_from_client(struct i915_request *request) { struct drm_i915_file_private *file_priv; - file_priv = READ_ONCE(request->file_priv); - if (!file_priv) + if (!READ_ONCE(request->file_priv)) return; - spin_lock(&file_priv->mm.lock); - if (request->file_priv) { + rcu_read_lock(); + file_priv = xchg(&request->file_priv, NULL); + if (file_priv) { + spin_lock(&file_priv->mm.lock); list_del(&request->client_link); - request->file_priv = NULL; + spin_unlock(&file_priv->mm.lock); } - spin_unlock(&file_priv->mm.lock); + rcu_read_unlock(); } static void free_capture_list(struct i915_request *request) @@ -194,11 +195,29 @@ static void free_capture_list(struct i915_request *request) } } -static bool i915_request_retire(struct i915_request *rq) +static void remove_from_engine(struct i915_request *rq) { - struct i915_active_request *active, *next; + struct intel_engine_cs *engine, *locked; - lockdep_assert_held(&rq->timeline->mutex); + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + locked = READ_ONCE(rq->engine); + spin_lock_irq(&locked->active.lock); + while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { + spin_unlock(&locked->active.lock); + spin_lock(&engine->active.lock); + locked = engine; + } + list_del(&rq->sched.link); + spin_unlock_irq(&locked->active.lock); +} + +bool i915_request_retire(struct i915_request *rq) +{ if (!i915_request_completed(rq)) return false; @@ -219,51 +238,19 @@ static bool i915_request_retire(struct i915_request *rq) * Note this requires that we are always called in request * completion order. */ - GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests)); + GEM_BUG_ON(!list_is_first(&rq->link, + &i915_request_timeline(rq)->requests)); rq->ring->head = rq->postfix; /* - * Walk through the active list, calling retire on each. This allows - * objects to track their GPU activity and mark themselves as idle - * when their *last* active request is completed (updating state - * tracking lists for eviction, active references for GEM, etc). - * - * As the ->retire() may free the node, we decouple it first and - * pass along the auxiliary information (to avoid dereferencing - * the node after the callback). - */ - list_for_each_entry_safe(active, next, &rq->active_list, link) { - /* - * In microbenchmarks or focusing upon time inside the kernel, - * we may spend an inordinate amount of time simply handling - * the retirement of requests and processing their callbacks. - * Of which, this loop itself is particularly hot due to the - * cache misses when jumping around the list of - * i915_active_request. So we try to keep this loop as - * streamlined as possible and also prefetch the next - * i915_active_request to try and hide the likely cache miss. - */ - prefetchw(next); - - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, rq); - } - - local_irq_disable(); - - /* * We only loosely track inflight requests across preemption, * and so we may find ourselves attempting to retire a _completed_ * request that we have removed from the HW and put back on a run * queue. */ - spin_lock(&rq->engine->active.lock); - list_del(&rq->sched.link); - spin_unlock(&rq->engine->active.lock); + remove_from_engine(rq); - spin_lock(&rq->lock); + spin_lock_irq(&rq->lock); i915_request_mark_complete(rq); if (!i915_request_signaled(rq)) dma_fence_signal_locked(&rq->fence); @@ -278,9 +265,7 @@ static bool i915_request_retire(struct i915_request *rq) __notify_execute_cb(rq); } GEM_BUG_ON(!list_empty(&rq->execute_cb)); - spin_unlock(&rq->lock); - - local_irq_enable(); + spin_unlock_irq(&rq->lock); remove_from_client(rq); list_del(&rq->link); @@ -297,7 +282,7 @@ static bool i915_request_retire(struct i915_request *rq) void i915_request_retire_upto(struct i915_request *rq) { - struct intel_timeline * const tl = rq->timeline; + struct intel_timeline * const tl = i915_request_timeline(rq); struct i915_request *tmp; GEM_TRACE("%s fence %llx:%lld, current %d\n", @@ -305,7 +290,6 @@ void i915_request_retire_upto(struct i915_request *rq) rq->fence.context, rq->fence.seqno, hwsp_seqno(rq)); - lockdep_assert_held(&tl->mutex); GEM_BUG_ON(!i915_request_completed(rq)); do { @@ -358,9 +342,10 @@ __i915_request_await_execution(struct i915_request *rq, return 0; } -void __i915_request_submit(struct i915_request *request) +bool __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; + bool result = false; GEM_TRACE("%s fence %llx:%lld, current %d\n", engine->name, @@ -370,6 +355,25 @@ void __i915_request_submit(struct i915_request *request) GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); + /* + * With the advent of preempt-to-busy, we frequently encounter + * requests that we have unsubmitted from HW, but left running + * until the next ack and so have completed in the meantime. On + * resubmission of that completed request, we can skip + * updating the payload, and execlists can even skip submitting + * the request. + * + * We must remove the request from the caller's priority queue, + * and the caller must only call us when the request is in their + * priority queue, under the active.lock. This ensures that the + * request has *not* yet been retired and we can safely move + * the request into the engine->active.list where it will be + * dropped upon retiring. (Otherwise if resubmit a *retired* + * request, this would be a horrible use-after-free.) + */ + if (i915_request_completed(request)) + goto xfer; + if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); @@ -393,13 +397,18 @@ void __i915_request_submit(struct i915_request *request) i915_sw_fence_signaled(&request->semaphore)) engine->saturated |= request->sched.semaphores; - /* We may be recursing from the signal callback of another i915 fence */ - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + engine->emit_fini_breadcrumb(request, + request->ring->vaddr + request->postfix); - list_move_tail(&request->sched.link, &engine->active.requests); + trace_i915_request_execute(request); + engine->serial++; + result = true; - GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); - set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); +xfer: /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + + if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) + list_move_tail(&request->sched.link, &engine->active.requests); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && @@ -410,12 +419,7 @@ void __i915_request_submit(struct i915_request *request) spin_unlock(&request->lock); - engine->emit_fini_breadcrumb(request, - request->ring->vaddr + request->postfix); - - engine->serial++; - - trace_i915_request_execute(request); + return result; } void i915_request_submit(struct i915_request *request) @@ -641,9 +645,12 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->gem_context = ce->gem_context; rq->engine = ce->engine; rq->ring = ce->ring; - rq->timeline = tl; + rq->execution_mask = ce->engine->mask; + + rcu_assign_pointer(rq->timeline, tl); rq->hwsp_seqno = tl->hwsp_seqno; rq->hwsp_cacheline = tl->hwsp_cacheline; + rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ spin_lock_init(&rq->lock); @@ -661,9 +668,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->batch = NULL; rq->capture_list = NULL; rq->flags = 0; - rq->execution_mask = ALL_ENGINES; - INIT_LIST_HEAD(&rq->active_list); INIT_LIST_HEAD(&rq->execute_cb); /* @@ -702,7 +707,6 @@ err_unwind: ce->ring->emit = rq->head; /* Make sure we didn't add ourselves to external state before freeing */ - GEM_BUG_ON(!list_empty(&rq->active_list)); GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); @@ -747,16 +751,43 @@ err_unlock: static int i915_request_await_start(struct i915_request *rq, struct i915_request *signal) { - if (list_is_first(&signal->link, &signal->timeline->requests)) - return 0; + struct intel_timeline *tl; + struct dma_fence *fence; + int err; + + GEM_BUG_ON(i915_request_timeline(rq) == + rcu_access_pointer(signal->timeline)); - signal = list_prev_entry(signal, link); - if (intel_timeline_sync_is_later(rq->timeline, &signal->fence)) + rcu_read_lock(); + tl = rcu_dereference(signal->timeline); + if (i915_request_started(signal) || !kref_get_unless_zero(&tl->kref)) + tl = NULL; + rcu_read_unlock(); + if (!tl) /* already started or maybe even completed */ return 0; - return i915_sw_fence_await_dma_fence(&rq->submit, - &signal->fence, 0, - I915_FENCE_GFP); + fence = ERR_PTR(-EBUSY); + if (mutex_trylock(&tl->mutex)) { + fence = NULL; + if (!i915_request_started(signal) && + !list_is_first(&signal->link, &tl->requests)) { + signal = list_prev_entry(signal, link); + fence = dma_fence_get(&signal->fence); + } + mutex_unlock(&tl->mutex); + } + intel_timeline_put(tl); + if (IS_ERR_OR_NULL(fence)) + return PTR_ERR_OR_ZERO(fence); + + err = 0; + if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) + err = i915_sw_fence_await_dma_fence(&rq->submit, + fence, 0, + I915_FENCE_GFP); + dma_fence_put(fence); + + return err; } static intel_engine_mask_t @@ -782,34 +813,33 @@ emit_semaphore_wait(struct i915_request *to, struct i915_request *from, gfp_t gfp) { + const int has_token = INTEL_GEN(to->i915) >= 12; u32 hwsp_offset; + int len; u32 *cs; - int err; - GEM_BUG_ON(!from->timeline->has_initial_breadcrumb); GEM_BUG_ON(INTEL_GEN(to->i915) < 8); /* Just emit the first semaphore we see as request space is limited. */ if (already_busywaiting(to) & from->engine->mask) - return i915_sw_fence_await_dma_fence(&to->submit, - &from->fence, 0, - I915_FENCE_GFP); + goto await_fence; - err = i915_request_await_start(to, from); - if (err < 0) - return err; + if (i915_request_await_start(to, from) < 0) + goto await_fence; /* Only submit our spinner after the signaler is running! */ - err = __i915_request_await_execution(to, from, NULL, gfp); - if (err) - return err; + if (__i915_request_await_execution(to, from, NULL, gfp)) + goto await_fence; /* We need to pin the signaler's HWSP until we are finished reading. */ - err = intel_timeline_read_hwsp(from, to, &hwsp_offset); - if (err) - return err; + if (intel_timeline_read_hwsp(from, to, &hwsp_offset)) + goto await_fence; - cs = intel_ring_begin(to, 4); + len = 4; + if (has_token) + len += 2; + + cs = intel_ring_begin(to, len); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -821,18 +851,28 @@ emit_semaphore_wait(struct i915_request *to, * (post-wrap) values than they were expecting (and so wait * forever). */ - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = (MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_GTE_SDD) + + has_token; *cs++ = from->fence.seqno; *cs++ = hwsp_offset; *cs++ = 0; + if (has_token) { + *cs++ = 0; + *cs++ = MI_NOOP; + } intel_ring_advance(to, cs); to->sched.semaphores |= from->engine->mask; to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; return 0; + +await_fence: + return i915_sw_fence_await_dma_fence(&to->submit, + &from->fence, 0, + I915_FENCE_GFP); } static int @@ -916,21 +956,23 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Squash repeated waits to the same timelines */ if (fence->context && - intel_timeline_sync_is_later(rq->timeline, fence)) + intel_timeline_sync_is_later(i915_request_timeline(rq), + fence)) continue; if (dma_fence_is_i915(fence)) ret = i915_request_await_request(rq, to_request(fence)); else ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, - I915_FENCE_TIMEOUT, + fence->context ? I915_FENCE_TIMEOUT : 0, I915_FENCE_GFP); if (ret < 0) return ret; /* Record the latest fence used against each timeline */ if (fence->context) - intel_timeline_sync_set(rq->timeline, fence); + intel_timeline_sync_set(i915_request_timeline(rq), + fence); } while (--nchild); return 0; @@ -1072,7 +1114,7 @@ void i915_request_skip(struct i915_request *rq, int error) static struct i915_request * __i915_request_add_to_timeline(struct i915_request *rq) { - struct intel_timeline *timeline = rq->timeline; + struct intel_timeline *timeline = i915_request_timeline(rq); struct i915_request *prev; /* @@ -1095,8 +1137,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) * precludes optimising to use semaphores serialisation of a single * timeline across engines. */ - prev = rcu_dereference_protected(timeline->last_request.request, - lockdep_is_held(&timeline->mutex)); + prev = to_request(__i915_active_fence_set(&timeline->last_request, + &rq->fence)); if (prev && !i915_request_completed(prev)) { if (is_power_of_2(prev->engine->mask | rq->engine->mask)) i915_sw_fence_await_sw_fence(&rq->submit, @@ -1121,7 +1163,6 @@ __i915_request_add_to_timeline(struct i915_request *rq) * us, the timeline will hold its seqno which is later than ours. */ GEM_BUG_ON(timeline->seqno != rq->fence.seqno); - __i915_active_request_set(&timeline->last_request, rq); return prev; } @@ -1185,7 +1226,7 @@ void __i915_request_queue(struct i915_request *rq, void i915_request_add(struct i915_request *rq) { struct i915_sched_attr attr = rq->gem_context->sched; - struct intel_timeline * const tl = rq->timeline; + struct intel_timeline * const tl = i915_request_timeline(rq); struct i915_request *prev; lockdep_assert_held(&tl->mutex); @@ -1240,7 +1281,9 @@ void i915_request_add(struct i915_request *rq) * work on behalf of others -- but instead we should benefit from * improved resource management. (Well, that's the theory at least.) */ - if (prev && i915_request_completed(prev) && prev->timeline == tl) + if (prev && + i915_request_completed(prev) && + rcu_access_pointer(prev->timeline) == tl) i915_request_retire_upto(prev); mutex_unlock(&tl->mutex); @@ -1449,6 +1492,7 @@ long i915_request_wait(struct i915_request *rq, break; } + intel_engine_flush_submission(rq->engine); timeout = io_schedule_timeout(timeout); } __set_current_state(TASK_RUNNING); @@ -1461,48 +1505,6 @@ out: return timeout; } -bool i915_retire_requests(struct drm_i915_private *i915) -{ - struct intel_gt_timelines *timelines = &i915->gt.timelines; - struct intel_timeline *tl, *tn; - unsigned long flags; - LIST_HEAD(free); - - spin_lock_irqsave(&timelines->lock, flags); - list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { - if (!mutex_trylock(&tl->mutex)) - continue; - - intel_timeline_get(tl); - GEM_BUG_ON(!tl->active_count); - tl->active_count++; /* pin the list element */ - spin_unlock_irqrestore(&timelines->lock, flags); - - retire_requests(tl); - - spin_lock_irqsave(&timelines->lock, flags); - - /* Resume iteration after dropping lock */ - list_safe_reset_next(tl, tn, link); - if (!--tl->active_count) - list_del(&tl->link); - - mutex_unlock(&tl->mutex); - - /* Defer the final release to after the spinlock */ - if (refcount_dec_and_test(&tl->kref.refcount)) { - GEM_BUG_ON(tl->active_count); - list_add(&tl->link, &free); - } - } - spin_unlock_irqrestore(&timelines->lock, flags); - - list_for_each_entry_safe(tl, tn, &free, link) - __intel_timeline_free(&tl->kref); - - return !list_empty(&timelines->active_list); -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_request.c" #include "selftests/i915_request.c" diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 8ac6e1226a56..96991d64759c 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -113,7 +113,7 @@ struct i915_request { struct intel_engine_cs *engine; struct intel_context *hw_context; struct intel_ring *ring; - struct intel_timeline *timeline; + struct intel_timeline __rcu *timeline; struct list_head signal_link; /* @@ -211,14 +211,14 @@ struct i915_request { * on the active_list (of their final request). */ struct i915_capture_list *capture_list; - struct list_head active_list; /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; unsigned long flags; -#define I915_REQUEST_WAITBOOST BIT(0) -#define I915_REQUEST_NOPREEMPT BIT(1) +#define I915_REQUEST_WAITBOOST BIT(0) +#define I915_REQUEST_NOPREEMPT BIT(1) +#define I915_REQUEST_SENTINEL BIT(2) /** timeline->request entry for this request */ struct list_head link; @@ -251,6 +251,7 @@ struct i915_request *__i915_request_commit(struct i915_request *request); void __i915_request_queue(struct i915_request *rq, const struct i915_sched_attr *attr); +bool i915_request_retire(struct i915_request *rq); void i915_request_retire_upto(struct i915_request *rq); static inline struct i915_request * @@ -292,7 +293,7 @@ int i915_request_await_execution(struct i915_request *rq, void i915_request_add(struct i915_request *rq); -void __i915_request_submit(struct i915_request *request); +bool __i915_request_submit(struct i915_request *request); void i915_request_submit(struct i915_request *request); void i915_request_skip(struct i915_request *request, int error); @@ -309,10 +310,8 @@ long i915_request_wait(struct i915_request *rq, long timeout) __attribute__((nonnull(1))); #define I915_WAIT_INTERRUPTIBLE BIT(0) -#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ -#define I915_WAIT_PRIORITY BIT(2) /* small priority bump for the request */ -#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ -#define I915_WAIT_FOR_IDLE_BOOST BIT(4) +#define I915_WAIT_PRIORITY BIT(1) /* small priority bump for the request */ +#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ static inline bool i915_request_signaled(const struct i915_request *rq) { @@ -442,6 +441,29 @@ static inline bool i915_request_has_nopreempt(const struct i915_request *rq) return unlikely(rq->flags & I915_REQUEST_NOPREEMPT); } -bool i915_retire_requests(struct drm_i915_private *i915); +static inline bool i915_request_has_sentinel(const struct i915_request *rq) +{ + return unlikely(rq->flags & I915_REQUEST_SENTINEL); +} + +static inline struct intel_timeline * +i915_request_timeline(struct i915_request *rq) +{ + /* Valid only while the request is being constructed (or retired). */ + return rcu_dereference_protected(rq->timeline, + lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex)); +} + +static inline struct intel_timeline * +i915_request_active_timeline(struct i915_request *rq) +{ + /* + * When in use during submission, we are protected by a guarantee that + * the context/timeline is pinned and must remain pinned until after + * this submission. + */ + return rcu_dereference_protected(rq->timeline, + lockdep_is_held(&rq->engine->active.lock)); +} #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 6617963df9ed..b7b59328cb76 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -67,15 +67,15 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) } /** - * __for_each_sgt_dma - iterate over the DMA addresses of the given sg_table - * @__dmap: DMA address (output) + * __for_each_sgt_daddr - iterate over the device addresses of the given sg_table + * @__dp: Device address (output) * @__iter: 'struct sgt_iter' (iterator state, internal) * @__sgt: sg_table to iterate over (input) * @__step: step size */ -#define __for_each_sgt_dma(__dmap, __iter, __sgt, __step) \ +#define __for_each_sgt_daddr(__dp, __iter, __sgt, __step) \ for ((__iter) = __sgt_iter((__sgt)->sgl, true); \ - ((__dmap) = (__iter).dma + (__iter).curr); \ + ((__dp) = (__iter).dma + (__iter).curr), (__iter).sgp; \ (((__iter).curr += (__step)) >= (__iter).max) ? \ (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 7b84ebca2901..0ca40f6bf08c 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -177,9 +177,22 @@ static inline int rq_prio(const struct i915_request *rq) return rq->sched.attr.priority | __NO_PREEMPTION; } +static inline bool need_preempt(int prio, int active) +{ + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + */ + return prio >= max(I915_PRIORITY_NORMAL, active); +} + static void kick_submission(struct intel_engine_cs *engine, int prio) { - const struct i915_request *inflight = *engine->execlists.active; + const struct i915_request *inflight = + execlists_active(&engine->execlists); /* * If we are already the currently executing context, don't @@ -188,7 +201,7 @@ static void kick_submission(struct intel_engine_cs *engine, int prio) * tasklet, i.e. we have not change the priority queue * sufficiently to oust the running context. */ - if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight))) + if (!inflight || !need_preempt(prio, rq_prio(inflight))) return; tasklet_hi_schedule(&engine->execlists.tasklet); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 7eefccff39bf..07d243acf553 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -52,22 +52,4 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } -static inline bool i915_scheduler_need_preempt(int prio, int active) -{ - /* - * Allow preemption of low -> normal -> high, but we do - * not allow low priority tasks to preempt other low priority - * tasks under the impression that latency for low priority - * tasks does not matter (as much as background throughput), - * so kiss. - * - * More naturally we would write - * prio >= max(0, last); - * except that we wish to prevent triggering preemption at the same - * priority level: the task that is running should remain running - * to preserve FIFO ordering of dependencies. - */ - return prio > max(I915_PRIORITY_NORMAL - 1, active); -} - #endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index aad81acba9dc..d18e70550054 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -49,6 +49,15 @@ struct i915_sched_attr { * DAG of each request, we are able to insert it into a sorted queue when it * is ready, and are able to reorder its portion of the graph to accommodate * dynamic priority changes. + * + * Ok, there is now one active element to the "scheduler" in the backends. + * We let a new context run for a small amount of time before re-evaluating + * the run order. As we re-evaluate, we maintain the strict ordering of + * dependencies, but attempt to rotate the active contexts (the current context + * is put to the back of its priority queue, then reshuffling its dependents). + * This provides minimal timeslicing and prevents a userspace hog (e.g. + * something waiting on a user semaphore [VkEvent]) from denying service to + * others. */ struct i915_sched_node { struct list_head signalers_list; /* those before us, we depend upon */ diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 8508a01ad8b9..8812cdd9007f 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -28,6 +28,7 @@ #include "display/intel_fbc.h" #include "display/intel_gmbus.h" +#include "display/intel_vga.h" #include "i915_drv.h" #include "i915_reg.h" @@ -57,7 +58,7 @@ static void i915_restore_display(struct drm_i915_private *dev_priv) if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv)) I915_WRITE(FBC_CONTROL, dev_priv->regfile.saveFBC_CONTROL); - i915_redisable_vga(dev_priv); + intel_vga_redisable(dev_priv); } int i915_save_state(struct drm_i915_private *dev_priv) @@ -65,8 +66,6 @@ int i915_save_state(struct drm_i915_private *dev_priv) struct pci_dev *pdev = dev_priv->drm.pdev; int i; - mutex_lock(&dev_priv->drm.struct_mutex); - i915_save_display(dev_priv); if (IS_GEN(dev_priv, 4)) @@ -100,8 +99,6 @@ int i915_save_state(struct drm_i915_private *dev_priv) dev_priv->regfile.saveSWF3[i] = I915_READ(SWF3(i)); } - mutex_unlock(&dev_priv->drm.struct_mutex); - return 0; } @@ -110,8 +107,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) struct pci_dev *pdev = dev_priv->drm.pdev; int i; - mutex_lock(&dev_priv->drm.struct_mutex); - if (IS_GEN(dev_priv, 4)) pci_write_config_word(pdev, GCDGMBUS, dev_priv->regfile.saveGCDGMBUS); @@ -145,8 +140,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) I915_WRITE(SWF3(i), dev_priv->regfile.saveSWF3[i]); } - mutex_unlock(&dev_priv->drm.struct_mutex); - intel_gmbus_reset(dev_priv); return 0; diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c new file mode 100644 index 000000000000..39c79e1c5b52 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_switcheroo.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/vga_switcheroo.h> + +#include "i915_drv.h" +#include "i915_switcheroo.h" + +static void i915_switcheroo_set_state(struct pci_dev *pdev, + enum vga_switcheroo_state state) +{ + struct drm_i915_private *i915 = pdev_to_i915(pdev); + pm_message_t pmm = { .event = PM_EVENT_SUSPEND }; + + if (!i915) { + dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n"); + return; + } + + if (state == VGA_SWITCHEROO_ON) { + pr_info("switched on\n"); + i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING; + /* i915 resume handler doesn't set to D0 */ + pci_set_power_state(pdev, PCI_D0); + i915_resume_switcheroo(i915); + i915->drm.switch_power_state = DRM_SWITCH_POWER_ON; + } else { + pr_info("switched off\n"); + i915->drm.switch_power_state = DRM_SWITCH_POWER_CHANGING; + i915_suspend_switcheroo(i915, pmm); + i915->drm.switch_power_state = DRM_SWITCH_POWER_OFF; + } +} + +static bool i915_switcheroo_can_switch(struct pci_dev *pdev) +{ + struct drm_i915_private *i915 = pdev_to_i915(pdev); + + /* + * FIXME: open_count is protected by drm_global_mutex but that would lead to + * locking inversion with the driver load path. And the access here is + * completely racy anyway. So don't bother with locking for now. + */ + return i915 && i915->drm.open_count == 0; +} + +static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { + .set_gpu_state = i915_switcheroo_set_state, + .reprobe = NULL, + .can_switch = i915_switcheroo_can_switch, +}; + +int i915_switcheroo_register(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + return vga_switcheroo_register_client(pdev, &i915_switcheroo_ops, false); +} + +void i915_switcheroo_unregister(struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + vga_switcheroo_unregister_client(pdev); +} diff --git a/drivers/gpu/drm/i915/i915_switcheroo.h b/drivers/gpu/drm/i915/i915_switcheroo.h new file mode 100644 index 000000000000..59b6c1e07d75 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_switcheroo.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_SWITCHEROO__ +#define __I915_SWITCHEROO__ + +struct drm_i915_private; + +int i915_switcheroo_register(struct drm_i915_private *i915); +void i915_switcheroo_unregister(struct drm_i915_private *i915); + +#endif /* __I915_SWITCHEROO__ */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index d8a3b180c084..bf039b8ba593 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -30,6 +30,8 @@ #include <linux/stat.h> #include <linux/sysfs.h> +#include "gt/intel_rc6.h" + #include "i915_drv.h" #include "i915_sysfs.h" #include "intel_pm.h" @@ -49,7 +51,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv, u64 res = 0; with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) - res = intel_rc6_residency_us(dev_priv, reg); + res = intel_rc6_residency_us(&dev_priv->gt.rc6, reg); return DIV_ROUND_CLOSEST_ULL(res, 1000); } @@ -142,12 +144,12 @@ static const struct attribute_group media_rc6_attr_group = { }; #endif -static int l3_access_valid(struct drm_i915_private *dev_priv, loff_t offset) +static int l3_access_valid(struct drm_i915_private *i915, loff_t offset) { - if (!HAS_L3_DPF(dev_priv)) + if (!HAS_L3_DPF(i915)) return -EPERM; - if (offset % 4 != 0) + if (!IS_ALIGNED(offset, sizeof(u32))) return -EINVAL; if (offset >= GEN7_L3LOG_SIZE) @@ -162,31 +164,24 @@ i915_l3_read(struct file *filp, struct kobject *kobj, loff_t offset, size_t count) { struct device *kdev = kobj_to_dev(kobj); - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_device *dev = &dev_priv->drm; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); int slice = (int)(uintptr_t)attr->private; int ret; - count = round_down(count, 4); - - ret = l3_access_valid(dev_priv, offset); + ret = l3_access_valid(i915, offset); if (ret) return ret; + count = round_down(count, sizeof(u32)); count = min_t(size_t, GEN7_L3LOG_SIZE - offset, count); + memset(buf, 0, count); - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - - if (dev_priv->l3_parity.remap_info[slice]) + spin_lock(&i915->gem.contexts.lock); + if (i915->l3_parity.remap_info[slice]) memcpy(buf, - dev_priv->l3_parity.remap_info[slice] + (offset/4), + i915->l3_parity.remap_info[slice] + offset / sizeof(u32), count); - else - memset(buf, 0, count); - - mutex_unlock(&dev->struct_mutex); + spin_unlock(&i915->gem.contexts.lock); return count; } @@ -197,46 +192,49 @@ i915_l3_write(struct file *filp, struct kobject *kobj, loff_t offset, size_t count) { struct device *kdev = kobj_to_dev(kobj); - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_device *dev = &dev_priv->drm; - struct i915_gem_context *ctx; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); int slice = (int)(uintptr_t)attr->private; - u32 **remap_info; + u32 *remap_info, *freeme = NULL; + struct i915_gem_context *ctx; int ret; - ret = l3_access_valid(dev_priv, offset); + ret = l3_access_valid(i915, offset); if (ret) return ret; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + if (count < sizeof(u32)) + return -EINVAL; - remap_info = &dev_priv->l3_parity.remap_info[slice]; - if (!*remap_info) { - *remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); - if (!*remap_info) { - ret = -ENOMEM; - goto out; - } + remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); + if (!remap_info) + return -ENOMEM; + + spin_lock(&i915->gem.contexts.lock); + + if (i915->l3_parity.remap_info[slice]) { + freeme = remap_info; + remap_info = i915->l3_parity.remap_info[slice]; + } else { + i915->l3_parity.remap_info[slice] = remap_info; } - /* TODO: Ideally we really want a GPU reset here to make sure errors - * aren't propagated. Since I cannot find a stable way to reset the GPU - * at this point it is left as a TODO. - */ - memcpy(*remap_info + (offset/4), buf, count); + count = round_down(count, sizeof(u32)); + memcpy(remap_info + offset / sizeof(u32), buf, count); /* NB: We defer the remapping until we switch to the context */ - list_for_each_entry(ctx, &dev_priv->contexts.list, link) - ctx->remap_slice |= (1<<slice); + list_for_each_entry(ctx, &i915->gem.contexts.list, link) + ctx->remap_slice |= BIT(slice); - ret = count; + spin_unlock(&i915->gem.contexts.lock); + kfree(freeme); -out: - mutex_unlock(&dev->struct_mutex); + /* + * TODO: Ideally we really want a GPU reset here to make sure errors + * aren't propagated. Since I cannot find a stable way to reset the GPU + * at this point it is left as a TODO. + */ - return ret; + return count; } static const struct bin_attribute dpf_attrs = { diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 24f2944da09d..7ef7a1e1664c 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -665,7 +665,6 @@ TRACE_EVENT(i915_request_queue, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -675,7 +674,6 @@ TRACE_EVENT(i915_request_queue, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -683,10 +681,9 @@ TRACE_EVENT(i915_request_queue, __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->flags) + __entry->ctx, __entry->seqno, __entry->flags) ); DECLARE_EVENT_CLASS(i915_request, @@ -695,7 +692,6 @@ DECLARE_EVENT_CLASS(i915_request, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -704,16 +700,15 @@ DECLARE_EVENT_CLASS(i915_request, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno) + __entry->ctx, __entry->seqno) ); DEFINE_EVENT(i915_request, i915_request_add, @@ -738,7 +733,6 @@ TRACE_EVENT(i915_request_in, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -749,7 +743,6 @@ TRACE_EVENT(i915_request_in, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -758,9 +751,9 @@ TRACE_EVENT(i915_request_in, __entry->port = port; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, prio=%u, port=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, + __entry->ctx, __entry->seqno, __entry->prio, __entry->port) ); @@ -770,7 +763,6 @@ TRACE_EVENT(i915_request_out, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -780,7 +772,6 @@ TRACE_EVENT(i915_request_out, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -788,10 +779,9 @@ TRACE_EVENT(i915_request_out, __entry->completed = i915_request_completed(rq); ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, completed?=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->completed) + __entry->ctx, __entry->seqno, __entry->completed) ); #else @@ -829,7 +819,6 @@ TRACE_EVENT(i915_request_wait_begin, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -845,7 +834,6 @@ TRACE_EVENT(i915_request_wait_begin, */ TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -853,9 +841,9 @@ TRACE_EVENT(i915_request_wait_begin, __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, + __entry->ctx, __entry->seqno, __entry->flags) ); @@ -958,19 +946,17 @@ DECLARE_EVENT_CLASS(i915_context, TP_STRUCT__entry( __field(u32, dev) __field(struct i915_gem_context *, ctx) - __field(u32, hw_id) __field(struct i915_address_space *, vm) ), TP_fast_assign( __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; - __entry->hw_id = ctx->hw_id; - __entry->vm = ctx->vm; + __entry->vm = rcu_access_pointer(ctx->vm); ), - TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u", - __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id) + TP_printk("dev=%u, ctx=%p, ctx_vm=%p", + __entry->dev, __entry->ctx, __entry->vm) ) DEFINE_EVENT(i915_context, i915_context_create, diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 76525543009a..e90c4d0af8fd 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -32,6 +32,7 @@ #include "i915_drv.h" #include "i915_globals.h" +#include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_vma.h" @@ -90,6 +91,7 @@ static int __i915_vma_active(struct i915_active *ref) return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; } +__i915_active_call static void __i915_vma_retire(struct i915_active *ref) { i915_vma_put(active_to_vma(ref)); @@ -110,15 +112,15 @@ vma_create(struct drm_i915_gem_object *obj, if (vma == NULL) return ERR_PTR(-ENOMEM); - vma->vm = vm; + mutex_init(&vma->pages_mutex); + vma->vm = i915_vm_get(vm); vma->ops = &vm->vma_ops; vma->obj = obj; vma->resv = obj->base.resv; vma->size = obj->base.size; vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - i915_active_init(vm->i915, &vma->active, - __i915_vma_active, __i915_vma_retire); + i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire); /* Declare ourselves safe for use inside shrinkers */ if (IS_ENABLED(CONFIG_LOCKDEP)) { @@ -171,7 +173,7 @@ vma_create(struct drm_i915_gem_object *obj, i915_gem_object_get_stride(obj)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); - vma->flags |= I915_VMA_GGTT; + __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma)); } spin_lock(&obj->vma.lock); @@ -218,10 +220,6 @@ vma_create(struct drm_i915_gem_object *obj, spin_unlock(&obj->vma.lock); - mutex_lock(&vm->mutex); - list_add(&vma->vm_link, &vm->unbound_list); - mutex_unlock(&vm->mutex); - return vma; err_vma: @@ -265,8 +263,6 @@ vma_lookup(struct drm_i915_gem_object *obj, * Once created, the VMA is kept until either the object is freed, or the * address space is closed. * - * Must be called with struct_mutex held. - * * Returns the vma, or an error pointer. */ struct i915_vma * @@ -277,7 +273,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj, struct i915_vma *vma; GEM_BUG_ON(view && !i915_is_ggtt(vm)); - GEM_BUG_ON(vm->closed); + GEM_BUG_ON(!atomic_read(&vm->open)); spin_lock(&obj->vma.lock); vma = vma_lookup(obj, vm, view); @@ -291,18 +287,63 @@ i915_vma_instance(struct drm_i915_gem_object *obj, return vma; } +struct i915_vma_work { + struct dma_fence_work base; + struct i915_vma *vma; + enum i915_cache_level cache_level; + unsigned int flags; +}; + +static int __vma_bind(struct dma_fence_work *work) +{ + struct i915_vma_work *vw = container_of(work, typeof(*vw), base); + struct i915_vma *vma = vw->vma; + int err; + + err = vma->ops->bind_vma(vma, vw->cache_level, vw->flags); + if (err) + atomic_or(I915_VMA_ERROR, &vma->flags); + + if (vma->obj) + __i915_gem_object_unpin_pages(vma->obj); + + return err; +} + +static const struct dma_fence_work_ops bind_ops = { + .name = "bind", + .work = __vma_bind, +}; + +struct i915_vma_work *i915_vma_work(void) +{ + struct i915_vma_work *vw; + + vw = kzalloc(sizeof(*vw), GFP_KERNEL); + if (!vw) + return NULL; + + dma_fence_work_init(&vw->base, &bind_ops); + vw->base.dma.error = -EAGAIN; /* disable the worker by default */ + + return vw; +} + /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map * @cache_level: mapping cache level * @flags: flags like global or local mapping + * @work: preallocated worker for allocating and binding the PTE * * DMA addresses are taken from the scatter-gather table of this object (or of * this VMA in case of non-default GGTT views) and PTE entries set up. * Note that DMA addresses are also the only part of the SG table we care about. */ -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags) +int i915_vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags, + struct i915_vma_work *work) { u32 bind_flags; u32 vma_flags; @@ -319,13 +360,11 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (GEM_DEBUG_WARN_ON(!flags)) return -EINVAL; - bind_flags = 0; - if (flags & PIN_GLOBAL) - bind_flags |= I915_VMA_GLOBAL_BIND; - if (flags & PIN_USER) - bind_flags |= I915_VMA_LOCAL_BIND; + bind_flags = flags; + bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; - vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + vma_flags = atomic_read(&vma->flags); + vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; if (flags & PIN_UPDATE) bind_flags |= vma_flags; else @@ -336,11 +375,34 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, GEM_BUG_ON(!vma->pages); trace_i915_vma_bind(vma, bind_flags); - ret = vma->ops->bind_vma(vma, cache_level, bind_flags); - if (ret) - return ret; + if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) { + work->vma = vma; + work->cache_level = cache_level; + work->flags = bind_flags | I915_VMA_ALLOC; - vma->flags |= bind_flags; + /* + * Note we only want to chain up to the migration fence on + * the pages (not the object itself). As we don't track that, + * yet, we have to use the exclusive fence instead. + * + * Also note that we do not want to track the async vma as + * part of the obj->resv->excl_fence as it only affects + * execution and not content or object's backing store lifetime. + */ + GEM_BUG_ON(i915_active_has_exclusive(&vma->active)); + i915_active_set_exclusive(&vma->active, &work->base.dma); + work->base.dma.error = 0; /* enable the queue_work() */ + + if (vma->obj) + __i915_gem_object_pin_pages(vma->obj); + } else { + GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_async_flags); + ret = vma->ops->bind_vma(vma, cache_level, bind_flags); + if (ret) + return ret; + } + + atomic_or(bind_flags, &vma->flags); return 0; } @@ -351,17 +413,15 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) /* Access through the GTT requires the device to be awake. */ assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm); - - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { + if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { err = -ENODEV; goto err; } GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); + GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); - ptr = vma->iomap; + ptr = READ_ONCE(vma->iomap); if (ptr == NULL) { ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, vma->node.start, @@ -371,7 +431,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) goto err; } - vma->iomap = ptr; + if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) { + io_mapping_unmap(ptr); + ptr = vma->iomap; + } } __i915_vma_pin(vma); @@ -391,18 +454,12 @@ err: void i915_vma_flush_writes(struct i915_vma *vma) { - if (!i915_vma_has_ggtt_write(vma)) - return; - - intel_gt_flush_ggtt_writes(vma->vm->gt); - - i915_vma_unset_ggtt_write(vma); + if (i915_vma_unset_ggtt_write(vma)) + intel_gt_flush_ggtt_writes(vma->vm->gt); } void i915_vma_unpin_iomap(struct i915_vma *vma) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); i915_vma_flush_writes(vma); @@ -438,6 +495,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma, if (!drm_mm_node_allocated(&vma->node)) return false; + if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma))) + return true; + if (vma->node.size < size) return true; @@ -472,17 +532,12 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end; if (mappable && fenceable) - vma->flags |= I915_VMA_CAN_FENCE; + set_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); else - vma->flags &= ~I915_VMA_CAN_FENCE; + clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); } -static bool color_differs(struct drm_mm_node *node, unsigned long color) -{ - return drm_mm_node_allocated(node) && node->color != color; -} - -bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level) +bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color) { struct drm_mm_node *node = &vma->node; struct drm_mm_node *other; @@ -494,7 +549,7 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level) * these constraints apply and set the drm_mm.color_adjust * appropriately. */ - if (vma->vm->mm.color_adjust == NULL) + if (!i915_vm_has_cache_coloring(vma->vm)) return true; /* Only valid to be called on an already inserted vma */ @@ -502,11 +557,13 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level) GEM_BUG_ON(list_empty(&node->node_list)); other = list_prev_entry(node, node_list); - if (color_differs(other, cache_level) && !drm_mm_hole_follows(other)) + if (i915_node_color_differs(other, color) && + !drm_mm_hole_follows(other)) return false; other = list_next_entry(node, node_list); - if (color_differs(other, cache_level) && !drm_mm_hole_follows(node)) + if (i915_node_color_differs(other, color) && + !drm_mm_hole_follows(node)) return false; return true; @@ -541,13 +598,12 @@ static void assert_bind_count(const struct drm_i915_gem_object *obj) static int i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_i915_private *dev_priv = vma->vm->i915; - unsigned int cache_level; + unsigned long color; u64 start, end; int ret; GEM_BUG_ON(i915_vma_is_closed(vma)); - GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); size = max(size, vma->size); @@ -567,7 +623,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) end = vma->vm->total; if (flags & PIN_MAPPABLE) - end = min_t(u64, end, dev_priv->ggtt.mappable_end); + end = min_t(u64, end, i915_vm_to_ggtt(vma->vm)->mappable_end); if (flags & PIN_ZONE_4G) end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE); GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); @@ -583,35 +639,21 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) return -ENOSPC; } - if (vma->obj) { - ret = i915_gem_object_pin_pages(vma->obj); - if (ret) - return ret; - - cache_level = vma->obj->cache_level; - } else { - cache_level = 0; - } - - GEM_BUG_ON(vma->pages); - - ret = vma->ops->set_pages(vma); - if (ret) - goto err_unpin; + color = 0; + if (vma->obj && i915_vm_has_cache_coloring(vma->vm)) + color = vma->obj->cache_level; if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; if (!IS_ALIGNED(offset, alignment) || - range_overflows(offset, size, end)) { - ret = -EINVAL; - goto err_clear; - } + range_overflows(offset, size, end)) + return -EINVAL; ret = i915_gem_gtt_reserve(vma->vm, &vma->node, - size, offset, cache_level, + size, offset, color, flags); if (ret) - goto err_clear; + return ret; } else { /* * We only support huge gtt pages through the 48b PPGTT, @@ -647,20 +689,18 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } ret = i915_gem_gtt_insert(vma->vm, &vma->node, - size, alignment, cache_level, + size, alignment, color, start, end, flags); if (ret) - goto err_clear; + return ret; GEM_BUG_ON(vma->node.start < start); GEM_BUG_ON(vma->node.start + vma->node.size > end); } GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); - mutex_lock(&vma->vm->mutex); - list_move_tail(&vma->vm_link, &vma->vm->bound_list); - mutex_unlock(&vma->vm->mutex); + list_add_tail(&vma->vm_link, &vma->vm->bound_list); if (vma->obj) { atomic_inc(&vma->obj->bind_count); @@ -668,27 +708,15 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } return 0; - -err_clear: - vma->ops->clear_pages(vma); -err_unpin: - if (vma->obj) - i915_gem_object_unpin_pages(vma->obj); - return ret; } static void i915_vma_remove(struct i915_vma *vma) { GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); - - vma->ops->clear_pages(vma); + GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); - mutex_lock(&vma->vm->mutex); - drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - mutex_unlock(&vma->vm->mutex); + list_del(&vma->vm_link); /* * Since the unbound list is global, only move to that list if @@ -697,61 +725,222 @@ i915_vma_remove(struct i915_vma *vma) if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; - atomic_dec(&obj->bind_count); - /* * And finally now the object is completely decoupled from this * vma, we can drop its hold on the backing storage and allow * it to be reaped by the shrinker. */ - i915_gem_object_unpin_pages(obj); + atomic_dec(&obj->bind_count); assert_bind_count(obj); } + + drm_mm_remove_node(&vma->node); } -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags) +static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) { - const unsigned int bound = vma->flags; - int ret; + unsigned int bound; + bool pinned = true; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); - GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); + bound = atomic_read(&vma->flags); + do { + if (unlikely(flags & ~bound)) + return false; - if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { - ret = -EBUSY; - goto err_unpin; + if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) + return false; + + if (!(bound & I915_VMA_PIN_MASK)) + goto unpinned; + + GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0); + } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); + + return true; + +unpinned: + /* + * If pin_count==0, but we are bound, check under the lock to avoid + * racing with a concurrent i915_vma_unbind(). + */ + mutex_lock(&vma->vm->mutex); + do { + if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) { + pinned = false; + break; + } + + if (unlikely(flags & ~bound)) { + pinned = false; + break; + } + } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); + mutex_unlock(&vma->vm->mutex); + + return pinned; +} + +static int vma_get_pages(struct i915_vma *vma) +{ + int err = 0; + + if (atomic_add_unless(&vma->pages_count, 1, 0)) + return 0; + + /* Allocations ahoy! */ + if (mutex_lock_interruptible(&vma->pages_mutex)) + return -EINTR; + + if (!atomic_read(&vma->pages_count)) { + if (vma->obj) { + err = i915_gem_object_pin_pages(vma->obj); + if (err) + goto unlock; + } + + err = vma->ops->set_pages(vma); + if (err) { + if (vma->obj) + i915_gem_object_unpin_pages(vma->obj); + goto unlock; + } } + atomic_inc(&vma->pages_count); - if ((bound & I915_VMA_BIND_MASK) == 0) { - ret = i915_vma_insert(vma, size, alignment, flags); - if (ret) - goto err_unpin; +unlock: + mutex_unlock(&vma->pages_mutex); + + return err; +} + +static void __vma_put_pages(struct i915_vma *vma, unsigned int count) +{ + /* We allocate under vma_get_pages, so beware the shrinker */ + mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING); + GEM_BUG_ON(atomic_read(&vma->pages_count) < count); + if (atomic_sub_return(count, &vma->pages_count) == 0) { + vma->ops->clear_pages(vma); + GEM_BUG_ON(vma->pages); + if (vma->obj) + i915_gem_object_unpin_pages(vma->obj); } - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + mutex_unlock(&vma->pages_mutex); +} - ret = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0, flags); - if (ret) - goto err_remove; +static void vma_put_pages(struct i915_vma *vma) +{ + if (atomic_add_unless(&vma->pages_count, -1, 1)) + return; + + __vma_put_pages(vma, 1); +} + +static void vma_unbind_pages(struct i915_vma *vma) +{ + unsigned int count; + + lockdep_assert_held(&vma->vm->mutex); + + /* The upper portion of pages_count is the number of bindings */ + count = atomic_read(&vma->pages_count); + count >>= I915_VMA_PAGES_BIAS; + GEM_BUG_ON(!count); + + __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS); +} + +int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + struct i915_vma_work *work = NULL; + unsigned int bound; + int err; + + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); + + GEM_BUG_ON(flags & PIN_UPDATE); + GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL))); + + /* First try and grab the pin without rebinding the vma */ + if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK)) + return 0; + + err = vma_get_pages(vma); + if (err) + return err; + + if (flags & vma->vm->bind_async_flags) { + work = i915_vma_work(); + if (!work) { + err = -ENOMEM; + goto err_pages; + } + } - GEM_BUG_ON((vma->flags & I915_VMA_BIND_MASK) == 0); + /* No more allocations allowed once we hold vm->mutex */ + err = mutex_lock_interruptible(&vma->vm->mutex); + if (err) + goto err_fence; + + bound = atomic_read(&vma->flags); + if (unlikely(bound & I915_VMA_ERROR)) { + err = -ENOMEM; + goto err_unlock; + } + + if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) { + err = -EAGAIN; /* pins are meant to be fairly temporary */ + goto err_unlock; + } - if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) - __i915_vma_set_map_and_fenceable(vma); + if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) { + __i915_vma_pin(vma); + goto err_unlock; + } + err = i915_active_acquire(&vma->active); + if (err) + goto err_unlock; + + if (!(bound & I915_VMA_BIND_MASK)) { + err = i915_vma_insert(vma, size, alignment, flags); + if (err) + goto err_active; + + if (i915_is_ggtt(vma->vm)) + __i915_vma_set_map_and_fenceable(vma); + } + + GEM_BUG_ON(!vma->pages); + err = i915_vma_bind(vma, + vma->obj ? vma->obj->cache_level : 0, + flags, work); + if (err) + goto err_remove; + + /* There should only be at most 2 active bindings (user, global) */ + GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound); + atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); + + __i915_vma_pin(vma); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + GEM_BUG_ON(!i915_vma_is_bound(vma, flags)); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - return 0; err_remove: - if ((bound & I915_VMA_BIND_MASK) == 0) { + if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) i915_vma_remove(vma); - GEM_BUG_ON(vma->pages); - GEM_BUG_ON(vma->flags & I915_VMA_BIND_MASK); - } -err_unpin: - __i915_vma_unpin(vma); - return ret; +err_active: + i915_active_release(&vma->active); +err_unlock: + mutex_unlock(&vma->vm->mutex); +err_fence: + if (work) + dma_fence_work_commit(&work->base); +err_pages: + vma_put_pages(vma); + return err; } void i915_vma_close(struct i915_vma *vma) @@ -782,9 +971,6 @@ static void __i915_vma_remove_closed(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - if (!i915_vma_is_closed(vma)) - return; - spin_lock_irq(&i915->gt.closed_lock); list_del_init(&vma->closed_link); spin_unlock_irq(&i915->gt.closed_lock); @@ -792,44 +978,35 @@ static void __i915_vma_remove_closed(struct i915_vma *vma) void i915_vma_reopen(struct i915_vma *vma) { - __i915_vma_remove_closed(vma); + if (i915_vma_is_closed(vma)) + __i915_vma_remove_closed(vma); } -static void __i915_vma_destroy(struct i915_vma *vma) +void i915_vma_destroy(struct i915_vma *vma) { - GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(vma->fence); - - mutex_lock(&vma->vm->mutex); - list_del(&vma->vm_link); - mutex_unlock(&vma->vm->mutex); + if (drm_mm_node_allocated(&vma->node)) { + mutex_lock(&vma->vm->mutex); + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); + mutex_unlock(&vma->vm->mutex); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + } + GEM_BUG_ON(i915_vma_is_active(vma)); if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; spin_lock(&obj->vma.lock); list_del(&vma->obj_link); - rb_erase(&vma->obj_node, &vma->obj->vma.tree); + rb_erase(&vma->obj_node, &obj->vma.tree); spin_unlock(&obj->vma.lock); } - i915_active_fini(&vma->active); - - i915_vma_free(vma); -} - -void i915_vma_destroy(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - - GEM_BUG_ON(i915_vma_is_pinned(vma)); - __i915_vma_remove_closed(vma); + i915_vm_put(vma->vm); - WARN_ON(i915_vma_unbind(vma)); - GEM_BUG_ON(i915_vma_is_active(vma)); - - __i915_vma_destroy(vma); + i915_active_fini(&vma->active); + i915_vma_free(vma); } void i915_vma_parked(struct drm_i915_private *i915) @@ -838,12 +1015,32 @@ void i915_vma_parked(struct drm_i915_private *i915) spin_lock_irq(&i915->gt.closed_lock); list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { - list_del_init(&vma->closed_link); + struct drm_i915_gem_object *obj = vma->obj; + struct i915_address_space *vm = vma->vm; + + /* XXX All to avoid keeping a reference on i915_vma itself */ + + if (!kref_get_unless_zero(&obj->base.refcount)) + continue; + + if (!i915_vm_tryopen(vm)) { + i915_gem_object_put(obj); + obj = NULL; + } + spin_unlock_irq(&i915->gt.closed_lock); - i915_vma_destroy(vma); + if (obj) { + i915_vma_destroy(vma); + i915_gem_object_put(obj); + } + + i915_vm_close(vm); + /* Restart after dropping lock */ spin_lock_irq(&i915->gt.closed_lock); + next = list_first_entry(&i915->gt.closed_vma, + typeof(*next), closed_link); } spin_unlock_irq(&i915->gt.closed_lock); } @@ -883,6 +1080,20 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) list_del(&vma->obj->userfault_link); } +int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) +{ + int err; + + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + + /* Wait for the vma to be bound before we start! */ + err = i915_request_await_active(rq, &vma->active); + if (err) + return err; + + return i915_active_add_request(&vma->active, rq); +} + int i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags) @@ -890,27 +1101,15 @@ int i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; int err; - assert_vma_held(vma); assert_object_held(obj); - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - /* - * Add a reference if we're newly entering the active list. - * The order in which we add operations to the retirement queue is - * vital here: mark_active adds to the start of the callback list, - * such that subsequent callbacks are called first. Therefore we - * add the active reference first and queue for it to be dropped - * *last*. - */ - err = i915_active_ref(&vma->active, rq->timeline, rq); + err = __i915_vma_move_to_active(vma, rq); if (unlikely(err)) return err; if (flags & EXEC_OBJECT_WRITE) { if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS)) - i915_active_ref(&obj->frontbuffer->write, - rq->timeline, - rq); + i915_active_add_request(&obj->frontbuffer->write, rq); dma_resv_add_excl_fence(vma->resv, &rq->fence); obj->write_domain = I915_GEM_DOMAIN_RENDER; @@ -930,44 +1129,31 @@ int i915_vma_move_to_active(struct i915_vma *vma, return 0; } -int i915_vma_unbind(struct i915_vma *vma) +int __i915_vma_unbind(struct i915_vma *vma) { int ret; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + lockdep_assert_held(&vma->vm->mutex); /* * First wait upon any activity as retiring the request may * have side-effects such as unpinning or even unbinding this vma. + * + * XXX Actually waiting under the vm->mutex is a hinderance and + * should be pipelined wherever possible. In cases where that is + * unavoidable, we should lift the wait to before the mutex. */ - might_sleep(); - if (i915_vma_is_active(vma)) { - /* - * When a closed VMA is retired, it is unbound - eek. - * In order to prevent it from being recursively closed, - * take a pin on the vma so that the second unbind is - * aborted. - * - * Even more scary is that the retire callback may free - * the object (last active vma). To prevent the explosion - * we defer the actual object free to a worker that can - * only proceed once it acquires the struct_mutex (which - * we currently hold, therefore it cannot free this object - * before we are finished). - */ - __i915_vma_pin(vma); - ret = i915_active_wait(&vma->active); - __i915_vma_unpin(vma); - if (ret) - return ret; - } - GEM_BUG_ON(i915_vma_is_active(vma)); + ret = i915_vma_sync(vma); + if (ret) + return ret; + GEM_BUG_ON(i915_vma_is_active(vma)); if (i915_vma_is_pinned(vma)) { vma_print_allocator(vma, "is pinned"); return -EBUSY; } + GEM_BUG_ON(i915_vma_is_active(vma)); if (!drm_mm_node_allocated(&vma->node)) return 0; @@ -982,34 +1168,46 @@ int i915_vma_unbind(struct i915_vma *vma) GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); /* release the fence reg _after_ flushing */ - mutex_lock(&vma->vm->mutex); ret = i915_vma_revoke_fence(vma); - mutex_unlock(&vma->vm->mutex); if (ret) return ret; /* Force a pagefault for domain tracking on next user access */ - mutex_lock(&vma->vm->mutex); i915_vma_revoke_mmap(vma); - mutex_unlock(&vma->vm->mutex); __i915_vma_iounmap(vma); - vma->flags &= ~I915_VMA_CAN_FENCE; + clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); } GEM_BUG_ON(vma->fence); GEM_BUG_ON(i915_vma_has_userfault(vma)); - if (likely(!vma->vm->closed)) { + if (likely(atomic_read(&vma->vm->open))) { trace_i915_vma_unbind(vma); vma->ops->unbind_vma(vma); } - vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR), &vma->flags); + vma_unbind_pages(vma); i915_vma_remove(vma); return 0; } +int i915_vma_unbind(struct i915_vma *vma) +{ + struct i915_address_space *vm = vma->vm; + int err; + + err = mutex_lock_interruptible(&vm->mutex); + if (err) + return err; + + err = __i915_vma_unbind(vma); + mutex_unlock(&vm->mutex); + + return err; +} + struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma) { i915_gem_object_make_unshrinkable(vma->obj); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index af2ef0a51455..858908e3d1cc 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -72,7 +72,7 @@ struct i915_vma { * that exist in the ctx->handle_vmas LUT for this vma. */ atomic_t open_count; - unsigned long flags; + atomic_t flags; /** * How many users have pinned this object in GTT space. * @@ -96,22 +96,41 @@ struct i915_vma { * exclusive cachelines of a single page, so a maximum of 64 possible * users. */ -#define I915_VMA_PIN_MASK 0xff -#define I915_VMA_PIN_OVERFLOW BIT(8) +#define I915_VMA_PIN_MASK 0x3ff +#define I915_VMA_OVERFLOW 0x200 /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND BIT(9) -#define I915_VMA_LOCAL_BIND BIT(10) -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) +#define I915_VMA_GLOBAL_BIND_BIT 10 +#define I915_VMA_LOCAL_BIND_BIT 11 -#define I915_VMA_GGTT BIT(11) -#define I915_VMA_CAN_FENCE BIT(12) -#define I915_VMA_USERFAULT_BIT 13 -#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT) -#define I915_VMA_GGTT_WRITE BIT(14) +#define I915_VMA_GLOBAL_BIND ((int)BIT(I915_VMA_GLOBAL_BIND_BIT)) +#define I915_VMA_LOCAL_BIND ((int)BIT(I915_VMA_LOCAL_BIND_BIT)) + +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND) + +#define I915_VMA_ALLOC_BIT 12 +#define I915_VMA_ALLOC ((int)BIT(I915_VMA_ALLOC_BIT)) + +#define I915_VMA_ERROR_BIT 13 +#define I915_VMA_ERROR ((int)BIT(I915_VMA_ERROR_BIT)) + +#define I915_VMA_GGTT_BIT 14 +#define I915_VMA_CAN_FENCE_BIT 15 +#define I915_VMA_USERFAULT_BIT 16 +#define I915_VMA_GGTT_WRITE_BIT 17 + +#define I915_VMA_GGTT ((int)BIT(I915_VMA_GGTT_BIT)) +#define I915_VMA_CAN_FENCE ((int)BIT(I915_VMA_CAN_FENCE_BIT)) +#define I915_VMA_USERFAULT ((int)BIT(I915_VMA_USERFAULT_BIT)) +#define I915_VMA_GGTT_WRITE ((int)BIT(I915_VMA_GGTT_WRITE_BIT)) struct i915_active active; +#define I915_VMA_PAGES_BIAS 24 +#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1) + atomic_t pages_count; /* number of active binds to the pages */ + struct mutex pages_mutex; /* protect acquire/release of backing pages */ + /** * Support different GGTT views into the same object. * This means there can be multiple VMA mappings per object and per VM. @@ -158,52 +177,57 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma) return !i915_active_is_idle(&vma->active); } +int __must_check __i915_vma_move_to_active(struct i915_vma *vma, + struct i915_request *rq); int __must_check i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags); +#define __i915_vma_flags(v) ((unsigned long *)&(v)->flags.counter) + static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) { - return vma->flags & I915_VMA_GGTT; + return test_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_has_ggtt_write(const struct i915_vma *vma) { - return vma->flags & I915_VMA_GGTT_WRITE; + return test_bit(I915_VMA_GGTT_WRITE_BIT, __i915_vma_flags(vma)); } static inline void i915_vma_set_ggtt_write(struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - vma->flags |= I915_VMA_GGTT_WRITE; + set_bit(I915_VMA_GGTT_WRITE_BIT, __i915_vma_flags(vma)); } -static inline void i915_vma_unset_ggtt_write(struct i915_vma *vma) +static inline bool i915_vma_unset_ggtt_write(struct i915_vma *vma) { - vma->flags &= ~I915_VMA_GGTT_WRITE; + return test_and_clear_bit(I915_VMA_GGTT_WRITE_BIT, + __i915_vma_flags(vma)); } void i915_vma_flush_writes(struct i915_vma *vma); static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) { - return vma->flags & I915_VMA_CAN_FENCE; + return test_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_set_userfault(struct i915_vma *vma) { GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); - return __test_and_set_bit(I915_VMA_USERFAULT_BIT, &vma->flags); + return test_and_set_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma)); } static inline void i915_vma_unset_userfault(struct i915_vma *vma) { - return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags); + return clear_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_has_userfault(const struct i915_vma *vma) { - return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags); + return test_bit(I915_VMA_USERFAULT_BIT, __i915_vma_flags(vma)); } static inline bool i915_vma_is_closed(const struct i915_vma *vma) @@ -293,13 +317,18 @@ i915_vma_compare(struct i915_vma *vma, return memcmp(&vma->ggtt_view.partial, &view->partial, view->type); } -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags); -bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); +struct i915_vma_work *i915_vma_work(void); +int i915_vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags, + struct i915_vma_work *work); + +bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color); bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); void i915_vma_revoke_mmap(struct i915_vma *vma); +int __i915_vma_unbind(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); @@ -318,30 +347,12 @@ static inline void i915_vma_unlock(struct i915_vma *vma) dma_resv_unlock(vma->resv); } -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags); -static inline int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); - BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); - BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); - - /* Pin early to prevent the shrinker/eviction logic from destroying - * our vma as we insert and bind. - */ - if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) { - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - return 0; - } - - return __i915_vma_do_pin(vma, size, alignment, flags); -} +int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); static inline int i915_vma_pin_count(const struct i915_vma *vma) { - return vma->flags & I915_VMA_PIN_MASK; + return atomic_read(&vma->flags) & I915_VMA_PIN_MASK; } static inline bool i915_vma_is_pinned(const struct i915_vma *vma) @@ -351,18 +362,18 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma) static inline void __i915_vma_pin(struct i915_vma *vma) { - vma->flags++; - GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); + atomic_inc(&vma->flags); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); } static inline void __i915_vma_unpin(struct i915_vma *vma) { - vma->flags--; + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + atomic_dec(&vma->flags); } static inline void i915_vma_unpin(struct i915_vma *vma) { - GEM_BUG_ON(!i915_vma_is_pinned(vma)); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); __i915_vma_unpin(vma); } @@ -370,7 +381,13 @@ static inline void i915_vma_unpin(struct i915_vma *vma) static inline bool i915_vma_is_bound(const struct i915_vma *vma, unsigned int where) { - return vma->flags & where; + return atomic_read(&vma->flags) & where; +} + +static inline bool i915_node_color_differs(const struct drm_mm_node *node, + unsigned long color) +{ + return drm_mm_node_allocated(node) && node->color != color; } /** @@ -382,8 +399,6 @@ static inline bool i915_vma_is_bound(const struct i915_vma *vma, * the caller must call i915_vma_unpin_iomap to relinquish the pinning * after the iomapping is no longer required. * - * Callers must hold the struct_mutex. - * * Returns a valid iomapped pointer or ERR_PTR. */ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); @@ -395,8 +410,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); * * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). * - * Callers must hold the struct_mutex. This function is only valid to be - * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). + * This function is only valid to be called on a VMA previously + * iomapped by the caller with i915_vma_pin_iomap(). */ void i915_vma_unpin_iomap(struct i915_vma *vma); @@ -424,6 +439,8 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) int __must_check i915_vma_pin_fence(struct i915_vma *vma); int __must_check i915_vma_revoke_fence(struct i915_vma *vma); +int __i915_vma_pin_fence(struct i915_vma *vma); + static inline void __i915_vma_unpin_fence(struct i915_vma *vma) { GEM_BUG_ON(atomic_read(&vma->fence->pin_count) <= 0); @@ -441,7 +458,6 @@ static inline void __i915_vma_unpin_fence(struct i915_vma *vma) static inline void i915_vma_unpin_fence(struct i915_vma *vma) { - /* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */ if (vma->fence) __i915_vma_unpin_fence(vma); } @@ -470,4 +486,10 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma); void i915_vma_make_shrinkable(struct i915_vma *vma); void i915_vma_make_purgeable(struct i915_vma *vma); +static inline int i915_vma_sync(struct i915_vma *vma) +{ + /* Wait for the asynchronous bindings and pending GPU reads */ + return i915_active_wait(&vma->active); +} + #endif diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 546577e39b4e..09870a31b4f0 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -44,8 +44,8 @@ #define TGL_CSR_MAX_FW_SIZE 0x6000 MODULE_FIRMWARE(TGL_CSR_PATH); -#define ICL_CSR_PATH "i915/icl_dmc_ver1_07.bin" -#define ICL_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) +#define ICL_CSR_PATH "i915/icl_dmc_ver1_09.bin" +#define ICL_CSR_VERSION_REQUIRED CSR_VERSION(1, 9) #define ICL_CSR_MAX_FW_SIZE 0x6000 MODULE_FIRMWARE(ICL_CSR_PATH); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d0ed44d33484..85e480bdc673 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -93,9 +93,9 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) hweight8(sseu->slice_mask), sseu->slice_mask); drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); for (s = 0; s < sseu->max_slices; s++) { - drm_printf(p, "slice%d: %u subslices, mask=%04x\n", + drm_printf(p, "slice%d: %u subslices, mask=%08x\n", s, intel_sseu_subslices_per_slice(sseu, s), - sseu->subslice_mask[s]); + intel_sseu_get_subslices(sseu, s)); } drm_printf(p, "EU total: %u\n", sseu->eu_total); drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); @@ -118,10 +118,9 @@ void intel_device_info_dump_runtime(const struct intel_runtime_info *info, static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, int subslice) { - int subslice_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); - int slice_stride = sseu->max_subslices * subslice_stride; + int slice_stride = sseu->max_subslices * sseu->eu_stride; - return slice * slice_stride + subslice * subslice_stride; + return slice * slice_stride + subslice * sseu->eu_stride; } static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, @@ -130,7 +129,7 @@ static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, int i, offset = sseu_eu_idx(sseu, slice, subslice); u16 eu_mask = 0; - for (i = 0; i < GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); i++) { + for (i = 0; i < sseu->eu_stride; i++) { eu_mask |= ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); } @@ -143,7 +142,7 @@ static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, { int i, offset = sseu_eu_idx(sseu, slice, subslice); - for (i = 0; i < GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); i++) { + for (i = 0; i < sseu->eu_stride; i++) { sseu->eu_mask[offset + i] = (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; } @@ -160,9 +159,9 @@ void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, } for (s = 0; s < sseu->max_slices; s++) { - drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n", + drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", s, intel_sseu_subslices_per_slice(sseu, s), - sseu->subslice_mask[s]); + intel_sseu_get_subslices(sseu, s)); for (ss = 0; ss < sseu->max_subslices; ss++) { u16 enabled_eus = sseu_get_eus(sseu, s, ss); @@ -183,44 +182,80 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, + u8 s_en, u32 ss_en, u16 eu_en) +{ + int s, ss; + + /* ss_en represents entire subslice mask across all slices */ + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > + sizeof(ss_en) * BITS_PER_BYTE); + + for (s = 0; s < sseu->max_slices; s++) { + if ((s_en & BIT(s)) == 0) + continue; + + sseu->slice_mask |= BIT(s); + + intel_sseu_set_subslices(sseu, s, ss_en); + + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, s, ss)) + sseu_set_eus(sseu, s, ss, eu_en); + } + sseu->eu_per_subslice = hweight16(eu_en); + sseu->eu_total = compute_eu_total(sseu); +} + +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv) +{ + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + u8 s_en; + u32 dss_en; + u16 eu_en = 0; + u8 eu_en_fuse; + int eu; + + /* + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. + * Instead of splitting these, provide userspace with an array + * of DSS to more closely represent the hardware resource. + */ + intel_sseu_set_info(sseu, 1, 6, 16); + + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; + + dss_en = I915_READ(GEN12_GT_DSS_ENABLE); + + /* one bit per pair of EUs */ + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + + /* TGL only supports slice-level power gating */ + sseu->has_slice_pg = 1; +} + static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u8 s_en; - u32 ss_en, ss_en_mask; + u32 ss_en; u8 eu_en; - int s; - if (IS_ELKHARTLAKE(dev_priv)) { - sseu->max_slices = 1; - sseu->max_subslices = 4; - sseu->max_eus_per_subslice = 8; - } else { - sseu->max_slices = 1; - sseu->max_subslices = 8; - sseu->max_eus_per_subslice = 8; - } + if (IS_ELKHARTLAKE(dev_priv)) + intel_sseu_set_info(sseu, 1, 4, 8); + else + intel_sseu_set_info(sseu, 1, 8, 8); s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); - ss_en_mask = BIT(sseu->max_subslices) - 1; eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - for (s = 0; s < sseu->max_slices; s++) { - if (s_en & BIT(s)) { - int ss_idx = sseu->max_subslices * s; - int ss; - - sseu->slice_mask |= BIT(s); - sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask; - for (ss = 0; ss < sseu->max_subslices; ss++) { - if (sseu->subslice_mask[s] & BIT(ss)) - sseu_set_eus(sseu, s, ss, eu_en); - } - } - } - sseu->eu_per_subslice = hweight8(eu_en); - sseu->eu_total = compute_eu_total(sseu); + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -236,23 +271,10 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) const int eu_mask = 0xff; u32 subslice_mask, eu_en; + intel_sseu_set_info(sseu, 6, 4, 8); + sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> GEN10_F2_S_ENA_SHIFT; - sseu->max_slices = 6; - sseu->max_subslices = 4; - sseu->max_eus_per_subslice = 8; - - subslice_mask = (1 << 4) - 1; - subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> - GEN10_F2_SS_DIS_SHIFT); - - /* - * Slice0 can have up to 3 subslices, but there are only 2 in - * slice1/2. - */ - sseu->subslice_mask[0] = subslice_mask; - for (s = 1; s < sseu->max_slices; s++) - sseu->subslice_mask[s] = subslice_mask & 0x3; /* Slice0 */ eu_en = ~I915_READ(GEN8_EU_DISABLE0); @@ -277,14 +299,25 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) eu_en = ~I915_READ(GEN10_EU_DISABLE3); sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); - /* Do a second pass where we mark the subslices disabled if all their - * eus are off. - */ + subslice_mask = (1 << 4) - 1; + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> + GEN10_F2_SS_DIS_SHIFT); + for (s = 0; s < sseu->max_slices; s++) { + u32 subslice_mask_with_eus = subslice_mask; + for (ss = 0; ss < sseu->max_subslices; ss++) { if (sseu_get_eus(sseu, s, ss) == 0) - sseu->subslice_mask[s] &= ~BIT(ss); + subslice_mask_with_eus &= ~BIT(ss); } + + /* + * Slice0 can have up to 3 subslices, but there are only 2 in + * slice1/2. + */ + intel_sseu_set_subslices(sseu, s, s == 0 ? + subslice_mask_with_eus : + subslice_mask_with_eus & 0x3); } sseu->eu_total = compute_eu_total(sseu); @@ -310,13 +343,12 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u32 fuse; + u8 subslice_mask = 0; fuse = I915_READ(CHV_FUSE_GT); sseu->slice_mask = BIT(0); - sseu->max_slices = 1; - sseu->max_subslices = 2; - sseu->max_eus_per_subslice = 8; + intel_sseu_set_info(sseu, 1, 2, 8); if (!(fuse & CHV_FGT_DISABLE_SS0)) { u8 disabled_mask = @@ -325,7 +357,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); - sseu->subslice_mask[0] |= BIT(0); + subslice_mask |= BIT(0); sseu_set_eus(sseu, 0, 0, ~disabled_mask); } @@ -336,10 +368,12 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); - sseu->subslice_mask[0] |= BIT(1); + subslice_mask |= BIT(1); sseu_set_eus(sseu, 0, 1, ~disabled_mask); } + intel_sseu_set_subslices(sseu, 0, subslice_mask); + sseu->eu_total = compute_eu_total(sseu); /* @@ -372,9 +406,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; /* BXT has a single slice and at most 3 subslices. */ - sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3; - sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4; - sseu->max_eus_per_subslice = 8; + intel_sseu_set_info(sseu, IS_GEN9_LP(dev_priv) ? 1 : 3, + IS_GEN9_LP(dev_priv) ? 3 : 4, 8); /* * The subslice disable field is global, i.e. it applies @@ -393,14 +426,14 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) /* skip disabled slice */ continue; - sseu->subslice_mask[s] = subslice_mask; + intel_sseu_set_subslices(sseu, s, subslice_mask); eu_disable = I915_READ(GEN9_EU_DISABLE(s)); for (ss = 0; ss < sseu->max_subslices; ss++) { int eu_per_ss; u8 eu_disabled_mask; - if (!(sseu->subslice_mask[s] & BIT(ss))) + if (!intel_sseu_has_subslice(sseu, s, ss)) /* skip disabled subslice */ continue; @@ -473,9 +506,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; - sseu->max_slices = 3; - sseu->max_subslices = 3; - sseu->max_eus_per_subslice = 8; + intel_sseu_set_info(sseu, 3, 3, 8); /* * The subslice disable field is global, i.e. it applies @@ -502,13 +533,13 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) /* skip disabled slice */ continue; - sseu->subslice_mask[s] = subslice_mask; + intel_sseu_set_subslices(sseu, s, subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { u8 eu_disabled_mask; u32 n_disabled; - if (!(sseu->subslice_mask[s] & BIT(ss))) + if (!intel_sseu_has_subslice(sseu, s, ss)) /* skip disabled subslice */ continue; @@ -552,6 +583,7 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u32 fuse1; + u8 subslice_mask = 0; int s, ss; /* @@ -564,22 +596,18 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) /* fall through */ case 1: sseu->slice_mask = BIT(0); - sseu->subslice_mask[0] = BIT(0); + subslice_mask = BIT(0); break; case 2: sseu->slice_mask = BIT(0); - sseu->subslice_mask[0] = BIT(0) | BIT(1); + subslice_mask = BIT(0) | BIT(1); break; case 3: sseu->slice_mask = BIT(0) | BIT(1); - sseu->subslice_mask[0] = BIT(0) | BIT(1); - sseu->subslice_mask[1] = BIT(0) | BIT(1); + subslice_mask = BIT(0) | BIT(1); break; } - sseu->max_slices = hweight8(sseu->slice_mask); - sseu->max_subslices = hweight8(sseu->subslice_mask[0]); - fuse1 = I915_READ(HSW_PAVP_FUSE1); switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { default: @@ -596,9 +624,14 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) sseu->eu_per_subslice = 6; break; } - sseu->max_eus_per_subslice = sseu->eu_per_subslice; + + intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), + hweight8(subslice_mask), + sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { + intel_sseu_set_subslices(sseu, s, subslice_mask); + for (ss = 0; ss < sseu->max_subslices; ss++) { sseu_set_eus(sseu, s, ss, (1UL << sseu->eu_per_subslice) - 1); @@ -900,12 +933,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) runtime->num_sprites[pipe] = 1; } - if (i915_modparams.disable_display) { - DRM_INFO("Display disabled (module parameter)\n"); - info->num_pipes = 0; - } else if (HAS_DISPLAY(dev_priv) && - (IS_GEN_RANGE(dev_priv, 7, 8)) && - HAS_PCH_SPLIT(dev_priv)) { + if (HAS_DISPLAY(dev_priv) && IS_GEN_RANGE(dev_priv, 7, 8) && + HAS_PCH_SPLIT(dev_priv)) { u32 fuse_strap = I915_READ(FUSE_STRAP); u32 sfuse_strap = I915_READ(SFUSE_STRAP); @@ -923,14 +952,14 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) (HAS_PCH_CPT(dev_priv) && !(sfuse_strap & SFUSE_STRAP_FUSE_LOCK))) { DRM_INFO("Display fused off, disabling\n"); - info->num_pipes = 0; + info->pipe_mask = 0; } else if (fuse_strap & IVB_PIPE_C_DISABLE) { DRM_INFO("PipeC fused off\n"); - info->num_pipes -= 1; + info->pipe_mask &= ~BIT(PIPE_C); } } else if (HAS_DISPLAY(dev_priv) && INTEL_GEN(dev_priv) >= 9) { u32 dfsm = I915_READ(SKL_DFSM); - u8 enabled_mask = BIT(info->num_pipes) - 1; + u8 enabled_mask = info->pipe_mask; if (dfsm & SKL_DFSM_PIPE_A_DISABLE) enabled_mask &= ~BIT(PIPE_A); @@ -951,7 +980,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) DRM_ERROR("invalid pipe fuse configuration: enabled_mask=0x%x\n", enabled_mask); else - info->num_pipes = hweight8(enabled_mask); + info->pipe_mask = enabled_mask; } /* Initialize slice/subslice/EU info */ @@ -965,8 +994,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) gen9_sseu_info_init(dev_priv); else if (IS_GEN(dev_priv, 10)) gen10_sseu_info_init(dev_priv); - else if (INTEL_GEN(dev_priv) >= 11) + else if (IS_GEN(dev_priv, 11)) gen11_sseu_info_init(dev_priv); + else if (INTEL_GEN(dev_priv) >= 12) + gen12_sseu_info_init(dev_priv); if (IS_GEN(dev_priv, 6) && intel_vtd_active()) { DRM_INFO("Disabling ppGTT for VT-d support\n"); @@ -1010,8 +1041,10 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) GEN11_GT_VEBOX_DISABLE_SHIFT; for (i = 0; i < I915_MAX_VCS; i++) { - if (!HAS_ENGINE(dev_priv, _VCS(i))) + if (!HAS_ENGINE(dev_priv, _VCS(i))) { + vdbox_mask &= ~BIT(i); continue; + } if (!(BIT(i) & vdbox_mask)) { info->engine_mask &= ~BIT(_VCS(i)); @@ -1032,8 +1065,10 @@ void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) GEM_BUG_ON(vdbox_mask != VDBOX_MASK(dev_priv)); for (i = 0; i < I915_MAX_VECS; i++) { - if (!HAS_ENGINE(dev_priv, _VECS(i))) + if (!HAS_ENGINE(dev_priv, _VECS(i))) { + vebox_mask &= ~BIT(i); continue; + } if (!(BIT(i) & vebox_mask)) { info->engine_mask &= ~BIT(_VECS(i)); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 92e0c2e0954c..e9940f932d26 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -135,6 +135,7 @@ enum intel_ppgtt_type { func(has_csr); \ func(has_ddi); \ func(has_dp_mst); \ + func(has_dsb); \ func(has_fbc); \ func(has_gmch); \ func(has_hotplug); \ @@ -159,9 +160,11 @@ struct intel_device_info { unsigned int page_sizes; /* page sizes supported by the HW */ + u32 memory_regions; /* regions supported by the HW */ + u32 display_mmio_offset; - u8 num_pipes; + u8 pipe_mask; #define DEFINE_FLAG(name) u8 name:1 DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG); diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c new file mode 100644 index 000000000000..72f98a111de1 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "intel_memory_region.h" +#include "i915_drv.h" + +/* XXX: Hysterical raisins. BIT(inst) needs to just be (inst) at some point. */ +#define REGION_MAP(type, inst) \ + BIT((type) + INTEL_MEMORY_TYPE_SHIFT) | BIT(inst) + +const u32 intel_region_map[] = { + [INTEL_REGION_SMEM] = REGION_MAP(INTEL_MEMORY_SYSTEM, 0), + [INTEL_REGION_LMEM] = REGION_MAP(INTEL_MEMORY_LOCAL, 0), + [INTEL_REGION_STOLEN] = REGION_MAP(INTEL_MEMORY_STOLEN, 0), +}; + +static u64 +intel_memory_region_free_pages(struct intel_memory_region *mem, + struct list_head *blocks) +{ + struct i915_buddy_block *block, *on; + u64 size = 0; + + list_for_each_entry_safe(block, on, blocks, link) { + size += i915_buddy_block_size(&mem->mm, block); + i915_buddy_free(&mem->mm, block); + } + INIT_LIST_HEAD(blocks); + + return size; +} + +void +__intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, + struct list_head *blocks) +{ + mutex_lock(&mem->mm_lock); + intel_memory_region_free_pages(mem, blocks); + mutex_unlock(&mem->mm_lock); +} + +void +__intel_memory_region_put_block_buddy(struct i915_buddy_block *block) +{ + struct list_head blocks; + + INIT_LIST_HEAD(&blocks); + list_add(&block->link, &blocks); + __intel_memory_region_put_pages_buddy(block->private, &blocks); +} + +int +__intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags, + struct list_head *blocks) +{ + unsigned int min_order = 0; + unsigned long n_pages; + + GEM_BUG_ON(!IS_ALIGNED(size, mem->mm.chunk_size)); + GEM_BUG_ON(!list_empty(blocks)); + + if (flags & I915_ALLOC_MIN_PAGE_SIZE) { + min_order = ilog2(mem->min_page_size) - + ilog2(mem->mm.chunk_size); + } + + if (flags & I915_ALLOC_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_order = ilog2(size) - ilog2(mem->mm.chunk_size); + } + + n_pages = size >> ilog2(mem->mm.chunk_size); + + mutex_lock(&mem->mm_lock); + + do { + struct i915_buddy_block *block; + unsigned int order; + + order = fls(n_pages) - 1; + GEM_BUG_ON(order > mem->mm.max_order); + GEM_BUG_ON(order < min_order); + + do { + block = i915_buddy_alloc(&mem->mm, order); + if (!IS_ERR(block)) + break; + + if (order-- == min_order) + goto err_free_blocks; + } while (1); + + n_pages -= BIT(order); + + block->private = mem; + list_add(&block->link, blocks); + + if (!n_pages) + break; + } while (1); + + mutex_unlock(&mem->mm_lock); + return 0; + +err_free_blocks: + intel_memory_region_free_pages(mem, blocks); + mutex_unlock(&mem->mm_lock); + return -ENXIO; +} + +struct i915_buddy_block * +__intel_memory_region_get_block_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct i915_buddy_block *block; + LIST_HEAD(blocks); + int ret; + + ret = __intel_memory_region_get_pages_buddy(mem, size, flags, &blocks); + if (ret) + return ERR_PTR(ret); + + block = list_first_entry(&blocks, typeof(*block), link); + list_del_init(&block->link); + return block; +} + +int intel_memory_region_init_buddy(struct intel_memory_region *mem) +{ + return i915_buddy_init(&mem->mm, resource_size(&mem->region), + PAGE_SIZE); +} + +void intel_memory_region_release_buddy(struct intel_memory_region *mem) +{ + i915_buddy_fini(&mem->mm); +} + +struct intel_memory_region * +intel_memory_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start, + const struct intel_memory_region_ops *ops) +{ + struct intel_memory_region *mem; + int err; + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) + return ERR_PTR(-ENOMEM); + + mem->i915 = i915; + mem->region = (struct resource)DEFINE_RES_MEM(start, size); + mem->io_start = io_start; + mem->min_page_size = min_page_size; + mem->ops = ops; + + mutex_init(&mem->objects.lock); + INIT_LIST_HEAD(&mem->objects.list); + INIT_LIST_HEAD(&mem->objects.purgeable); + + mutex_init(&mem->mm_lock); + + if (ops->init) { + err = ops->init(mem); + if (err) + goto err_free; + } + + kref_init(&mem->kref); + return mem; + +err_free: + kfree(mem); + return ERR_PTR(err); +} + +static void __intel_memory_region_destroy(struct kref *kref) +{ + struct intel_memory_region *mem = + container_of(kref, typeof(*mem), kref); + + if (mem->ops->release) + mem->ops->release(mem); + + mutex_destroy(&mem->mm_lock); + mutex_destroy(&mem->objects.lock); + kfree(mem); +} + +struct intel_memory_region * +intel_memory_region_get(struct intel_memory_region *mem) +{ + kref_get(&mem->kref); + return mem; +} + +void intel_memory_region_put(struct intel_memory_region *mem) +{ + kref_put(&mem->kref, __intel_memory_region_destroy); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_memory_region.c" +#include "selftests/mock_region.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h new file mode 100644 index 000000000000..49b059a2be70 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_MEMORY_REGION_H__ +#define __INTEL_MEMORY_REGION_H__ + +#include <linux/kref.h> +#include <linux/ioport.h> +#include <linux/mutex.h> +#include <linux/io-mapping.h> + +#include "i915_buddy.h" + +struct drm_i915_private; +struct drm_i915_gem_object; +struct intel_memory_region; +struct sg_table; + +/** + * Base memory type + */ +enum intel_memory_type { + INTEL_MEMORY_SYSTEM = 0, + INTEL_MEMORY_LOCAL, + INTEL_MEMORY_STOLEN, +}; + +enum intel_region_id { + INTEL_REGION_SMEM = 0, + INTEL_REGION_LMEM, + INTEL_REGION_STOLEN, + INTEL_REGION_UNKNOWN, /* Should be last */ +}; + +#define REGION_SMEM BIT(INTEL_REGION_SMEM) +#define REGION_LMEM BIT(INTEL_REGION_LMEM) +#define REGION_STOLEN BIT(INTEL_REGION_STOLEN) + +#define INTEL_MEMORY_TYPE_SHIFT 16 + +#define MEMORY_TYPE_FROM_REGION(r) (ilog2((r) >> INTEL_MEMORY_TYPE_SHIFT)) +#define MEMORY_INSTANCE_FROM_REGION(r) (ilog2((r) & 0xffff)) + +#define I915_ALLOC_MIN_PAGE_SIZE BIT(0) +#define I915_ALLOC_CONTIGUOUS BIT(1) + +/** + * Memory regions encoded as type | instance + */ +extern const u32 intel_region_map[]; + +struct intel_memory_region_ops { + unsigned int flags; + + int (*init)(struct intel_memory_region *mem); + void (*release)(struct intel_memory_region *mem); + + struct drm_i915_gem_object * + (*create_object)(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); +}; + +struct intel_memory_region { + struct drm_i915_private *i915; + + const struct intel_memory_region_ops *ops; + + struct io_mapping iomap; + struct resource region; + + struct i915_buddy_mm mm; + struct mutex mm_lock; + + struct kref kref; + + resource_size_t io_start; + resource_size_t min_page_size; + + unsigned int type; + unsigned int instance; + unsigned int id; + + struct { + struct mutex lock; /* Protects access to objects */ + struct list_head list; + struct list_head purgeable; + } objects; +}; + +int intel_memory_region_init_buddy(struct intel_memory_region *mem); +void intel_memory_region_release_buddy(struct intel_memory_region *mem); + +int __intel_memory_region_get_pages_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags, + struct list_head *blocks); +struct i915_buddy_block * +__intel_memory_region_get_block_buddy(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags); +void __intel_memory_region_put_pages_buddy(struct intel_memory_region *mem, + struct list_head *blocks); +void __intel_memory_region_put_block_buddy(struct i915_buddy_block *block); + +struct intel_memory_region * +intel_memory_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start, + const struct intel_memory_region_ops *ops); + +struct intel_memory_region * +intel_memory_region_get(struct intel_memory_region *mem); +void intel_memory_region_put(struct intel_memory_region *mem); + +#endif diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index fa864d8f2b73..1035d3d46fd8 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -69,6 +69,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); return PCH_CNP; case INTEL_PCH_CMP_DEVICE_ID_TYPE: + case INTEL_PCH_CMP2_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n"); WARN_ON(!IS_COFFEELAKE(dev_priv)); /* CometPoint is CNP Compatible */ @@ -78,7 +79,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) WARN_ON(!IS_ICELAKE(dev_priv)); return PCH_ICP; case INTEL_PCH_MCC_DEVICE_ID_TYPE: - case INTEL_PCH_MCC2_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n"); WARN_ON(!IS_ELKHARTLAKE(dev_priv)); return PCH_MCC; @@ -86,6 +86,11 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) DRM_DEBUG_KMS("Found Tiger Lake LP PCH\n"); WARN_ON(!IS_TIGERLAKE(dev_priv)); return PCH_TGP; + case INTEL_PCH_JSP_DEVICE_ID_TYPE: + case INTEL_PCH_JSP2_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found Jasper Lake PCH\n"); + WARN_ON(!IS_ELKHARTLAKE(dev_priv)); + return PCH_JSP; default: return PCH_NONE; } diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index e6a2d65f19c6..f4dc18c34291 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -23,6 +23,7 @@ enum intel_pch { PCH_SPT, /* Sunrisepoint/Kaby Lake PCH */ PCH_CNP, /* Cannon/Comet Lake PCH */ PCH_ICP, /* Ice Lake PCH */ + PCH_JSP, /* Jasper Lake PCH */ PCH_MCC, /* Mule Creek Canyon PCH */ PCH_TGP, /* Tiger Lake PCH */ }; @@ -41,16 +42,19 @@ enum intel_pch { #define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 #define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280 +#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 #define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 #define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 -#define INTEL_PCH_MCC2_DEVICE_ID_TYPE 0x3880 #define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080 +#define INTEL_PCH_JSP_DEVICE_ID_TYPE 0x4D80 +#define INTEL_PCH_JSP2_DEVICE_ID_TYPE 0x3880 #define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100 #define INTEL_PCH_P3X_DEVICE_ID_TYPE 0x7000 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */ #define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type) #define INTEL_PCH_ID(dev_priv) ((dev_priv)->pch_id) +#define HAS_PCH_JSP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_JSP) #define HAS_PCH_MCC(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_MCC) #define HAS_PCH_TGP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_TGP) #define HAS_PCH_ICP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ICP) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 75ee027abb80..362234449087 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -25,7 +25,6 @@ * */ -#include <linux/cpufreq.h> #include <linux/module.h> #include <linux/pm_runtime.h> @@ -38,6 +37,8 @@ #include "display/intel_fbc.h" #include "display/intel_sprite.h" +#include "gt/intel_llc.h" + #include "i915_drv.h" #include "i915_irq.h" #include "i915_trace.h" @@ -45,26 +46,6 @@ #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" -/** - * DOC: RC6 - * - * RC6 is a special power stage which allows the GPU to enter an very - * low-voltage mode when idle, using down to 0V while at this stage. This - * stage is entered automatically when the GPU is idle when RC6 support is - * enabled, and as soon as new workload arises GPU wakes up automatically as well. - * - * There are different RC6 modes available in Intel GPU, which differentiate - * among each other with the latency required to enter and leave RC6 and - * voltage consumed by the GPU in different states. - * - * The combination of the following flags define which states GPU is allowed - * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and - * RC6pp is deepest RC6. Their support by hardware varies according to the - * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one - * which brings the most power savings; deeper states save more power, but - * require higher latency to switch to and wake up. - */ - static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) { if (HAS_LLC(dev_priv)) { @@ -1137,10 +1118,7 @@ static u16 g4x_compute_wm(const struct intel_crtc_state *crtc_state, clock = adjusted_mode->crtc_clock; htotal = adjusted_mode->crtc_htotal; - if (plane->id == PLANE_CURSOR) - width = plane_state->base.crtc_w; - else - width = drm_rect_width(&plane_state->base.dst); + width = drm_rect_width(&plane_state->base.dst); if (plane->id == PLANE_CURSOR) { wm = intel_wm_method2(clock, htotal, width, cpp, latency); @@ -1327,8 +1305,8 @@ static int g4x_compute_pipe_wm(struct intel_crtc_state *crtc_state) struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->base.state); struct g4x_wm_state *wm_state = &crtc_state->wm.g4x.optimal; - int num_active_planes = hweight32(crtc_state->active_planes & - ~BIT(PLANE_CURSOR)); + int num_active_planes = hweight8(crtc_state->active_planes & + ~BIT(PLANE_CURSOR)); const struct g4x_pipe_wm *raw; const struct intel_plane_state *old_plane_state; const struct intel_plane_state *new_plane_state; @@ -1490,7 +1468,7 @@ static void g4x_merge_wm(struct drm_i915_private *dev_priv, struct g4x_wm_values *wm) { struct intel_crtc *crtc; - int num_active_crtcs = 0; + int num_active_pipes = 0; wm->cxsr = true; wm->hpll_en = true; @@ -1509,10 +1487,10 @@ static void g4x_merge_wm(struct drm_i915_private *dev_priv, if (!wm_state->fbc_en) wm->fbc_en = false; - num_active_crtcs++; + num_active_pipes++; } - if (num_active_crtcs != 1) { + if (num_active_pipes != 1) { wm->cxsr = false; wm->hpll_en = false; wm->fbc_en = false; @@ -1659,7 +1637,7 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); - int num_active_planes = hweight32(active_planes); + int num_active_planes = hweight8(active_planes); const int fifo_size = 511; int fifo_extra, fifo_left = fifo_size; int sprite0_fifo_extra = 0; @@ -1848,8 +1826,8 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; const struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; - int num_active_planes = hweight32(crtc_state->active_planes & - ~BIT(PLANE_CURSOR)); + int num_active_planes = hweight8(crtc_state->active_planes & + ~BIT(PLANE_CURSOR)); bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base); const struct intel_plane_state *old_plane_state; const struct intel_plane_state *new_plane_state; @@ -1909,7 +1887,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) for (level = 0; level < wm_state->num_levels; level++) { const struct g4x_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; - const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; + const int sr_fifo_size = INTEL_NUM_PIPES(dev_priv) * 512 - 1; if (!vlv_raw_crtc_wm_is_valid(crtc_state, level)) break; @@ -2098,7 +2076,7 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, struct vlv_wm_values *wm) { struct intel_crtc *crtc; - int num_active_crtcs = 0; + int num_active_pipes = 0; wm->level = dev_priv->wm.max_level; wm->cxsr = true; @@ -2112,14 +2090,14 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, if (!wm_state->cxsr) wm->cxsr = false; - num_active_crtcs++; + num_active_pipes++; wm->level = min_t(int, wm->level, wm_state->num_levels - 1); } - if (num_active_crtcs != 1) + if (num_active_pipes != 1) wm->cxsr = false; - if (num_active_crtcs > 1) + if (num_active_pipes > 1) wm->level = VLV_WM_LEVEL_PM2; for_each_intel_crtc(&dev_priv->drm, crtc) { @@ -2569,7 +2547,8 @@ static u32 ilk_compute_cur_wm(const struct intel_crtc_state *crtc_state, return ilk_wm_method2(crtc_state->pixel_rate, crtc_state->base.adjusted_mode.crtc_htotal, - plane_state->base.crtc_w, cpp, mem_value); + drm_rect_width(&plane_state->base.dst), + cpp, mem_value); } /* Only for WM_LP. */ @@ -2648,7 +2627,7 @@ static unsigned int ilk_plane_wm_max(const struct drm_i915_private *dev_priv, /* HSW allows LP1+ watermarks even with multiple pipes */ if (level == 0 || config->num_pipes_active > 1) { - fifo_size /= INTEL_INFO(dev_priv)->num_pipes; + fifo_size /= INTEL_NUM_PIPES(dev_priv); /* * For some reason the non self refresh @@ -3109,8 +3088,8 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state) struct intel_pipe_wm *pipe_wm; struct drm_device *dev = state->dev; const struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_plane *plane; - const struct drm_plane_state *plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; const struct intel_plane_state *pristate = NULL; const struct intel_plane_state *sprstate = NULL; const struct intel_plane_state *curstate = NULL; @@ -3119,15 +3098,13 @@ static int ilk_compute_pipe_wm(struct intel_crtc_state *crtc_state) pipe_wm = &crtc_state->wm.ilk.optimal; - drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, &crtc_state->base) { - const struct intel_plane_state *ps = to_intel_plane_state(plane_state); - - if (plane->type == DRM_PLANE_TYPE_PRIMARY) - pristate = ps; - else if (plane->type == DRM_PLANE_TYPE_OVERLAY) - sprstate = ps; - else if (plane->type == DRM_PLANE_TYPE_CURSOR) - curstate = ps; + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) + pristate = plane_state; + else if (plane->base.type == DRM_PLANE_TYPE_OVERLAY) + sprstate = plane_state; + else if (plane->base.type == DRM_PLANE_TYPE_CURSOR) + curstate = plane_state; } pipe_wm->pipe_enabled = crtc_state->base.active; @@ -3654,10 +3631,47 @@ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) static bool intel_has_sagv(struct drm_i915_private *dev_priv) { + /* HACK! */ + if (IS_GEN(dev_priv, 12)) + return false; + return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) && dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED; } +static void +skl_setup_sagv_block_time(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) >= 12) { + u32 val = 0; + int ret; + + ret = sandybridge_pcode_read(dev_priv, + GEN12_PCODE_READ_SAGV_BLOCK_TIME_US, + &val, NULL); + if (!ret) { + dev_priv->sagv_block_time_us = val; + return; + } + + DRM_DEBUG_DRIVER("Couldn't read SAGV block time!\n"); + } else if (IS_GEN(dev_priv, 11)) { + dev_priv->sagv_block_time_us = 10; + return; + } else if (IS_GEN(dev_priv, 10)) { + dev_priv->sagv_block_time_us = 20; + return; + } else if (IS_GEN(dev_priv, 9)) { + dev_priv->sagv_block_time_us = 30; + return; + } else { + MISSING_CASE(INTEL_GEN(dev_priv)); + } + + /* Default to an unusable block time */ + dev_priv->sagv_block_time_us = -1; +} + /* * SAGV dynamically adjusts the system agent voltage and clock frequencies * depending on power and performance requirements. The display engine access @@ -3746,33 +3760,25 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state) struct intel_crtc_state *crtc_state; enum pipe pipe; int level, latency; - int sagv_block_time_us; if (!intel_has_sagv(dev_priv)) return false; - if (IS_GEN(dev_priv, 9)) - sagv_block_time_us = 30; - else if (IS_GEN(dev_priv, 10)) - sagv_block_time_us = 20; - else - sagv_block_time_us = 10; - /* * If there are no active CRTCs, no additional checks need be performed */ - if (hweight32(state->active_crtcs) == 0) + if (hweight8(state->active_pipes) == 0) return true; /* * SKL+ workaround: bspec recommends we disable SAGV when we have * more then one pipe enabled */ - if (hweight32(state->active_crtcs) > 1) + if (hweight8(state->active_pipes) > 1) return false; /* Since we're now guaranteed to only have one active CRTC... */ - pipe = ffs(state->active_crtcs) - 1; + pipe = ffs(state->active_pipes) - 1; crtc = intel_get_crtc_for_pipe(dev_priv, pipe); crtc_state = to_intel_crtc_state(crtc->base.state); @@ -3804,7 +3810,7 @@ bool intel_can_enable_sagv(struct intel_atomic_state *state) * incur memory latencies higher than sagv_block_time_us we * can't enable SAGV. */ - if (latency < sagv_block_time_us) + if (latency < dev_priv->sagv_block_time_us) return false; } @@ -3867,14 +3873,14 @@ skl_ddb_get_pipe_allocation_limits(struct drm_i915_private *dev_priv, if (WARN_ON(!state) || !crtc_state->base.active) { alloc->start = 0; alloc->end = 0; - *num_active = hweight32(dev_priv->active_crtcs); + *num_active = hweight8(dev_priv->active_pipes); return; } if (intel_state->active_pipe_changes) - *num_active = hweight32(intel_state->active_crtcs); + *num_active = hweight8(intel_state->active_pipes); else - *num_active = hweight32(dev_priv->active_crtcs); + *num_active = hweight8(dev_priv->active_pipes); ddb_size = intel_get_ddb_size(dev_priv, crtc_state, total_data_rate, *num_active, ddb); @@ -4005,7 +4011,8 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv, val = I915_READ(PLANE_BUF_CFG(pipe, plane_id)); val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id)); - if (is_planar_yuv_format(fourcc)) + if (fourcc && + drm_format_info_is_yuv_semiplanar(drm_format_info(fourcc))) swap(val, val2); skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val); @@ -4063,7 +4070,6 @@ static uint_fixed_16_16_t skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); u32 src_w, src_h, dst_w, dst_h; uint_fixed_16_16_t fp_w_ratio, fp_h_ratio; uint_fixed_16_16_t downscale_h, downscale_w; @@ -4071,27 +4077,17 @@ skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state, if (WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state))) return u32_to_fixed16(0); - /* n.b., src is 16.16 fixed point, dst is whole integer */ - if (plane->id == PLANE_CURSOR) { - /* - * Cursors only support 0/180 degree rotation, - * hence no need to account for rotation here. - */ - src_w = plane_state->base.src_w >> 16; - src_h = plane_state->base.src_h >> 16; - dst_w = plane_state->base.crtc_w; - dst_h = plane_state->base.crtc_h; - } else { - /* - * Src coordinates are already rotated by 270 degrees for - * the 90/270 degree plane rotation cases (to match the - * GTT mapping), hence no need to account for rotation here. - */ - src_w = drm_rect_width(&plane_state->base.src) >> 16; - src_h = drm_rect_height(&plane_state->base.src) >> 16; - dst_w = drm_rect_width(&plane_state->base.dst); - dst_h = drm_rect_height(&plane_state->base.dst); - } + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + * + * n.b., src is 16.16 fixed point, dst is whole integer. + */ + src_w = drm_rect_width(&plane_state->base.src) >> 16; + src_h = drm_rect_height(&plane_state->base.src) >> 16; + dst_w = drm_rect_width(&plane_state->base.dst); + dst_h = drm_rect_height(&plane_state->base.dst); fp_w_ratio = div_fixed16(src_w, dst_w); fp_h_ratio = div_fixed16(src_h, dst_h); @@ -4139,8 +4135,8 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, { struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); struct drm_atomic_state *state = crtc_state->base.state; - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + const struct intel_plane_state *plane_state; + struct intel_plane *plane; int crtc_clock, dotclk; u32 pipe_max_pixel_rate; uint_fixed_16_16_t pipe_downscale; @@ -4149,12 +4145,10 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, if (!crtc_state->base.enable) return 0; - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) { + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { uint_fixed_16_16_t plane_downscale; uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8); int bpp; - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); if (!intel_wm_plane_visible(crtc_state, plane_state)) continue; @@ -4193,25 +4187,23 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, static u64 skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, - const int plane) + int color_plane) { - struct intel_plane *intel_plane = to_intel_plane(plane_state->base.plane); + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + const struct drm_framebuffer *fb = plane_state->base.fb; u32 data_rate; u32 width = 0, height = 0; - struct drm_framebuffer *fb; - u32 format; uint_fixed_16_16_t down_scale_amount; u64 rate; if (!plane_state->base.visible) return 0; - fb = plane_state->base.fb; - format = fb->format->format; - - if (intel_plane->id == PLANE_CURSOR) + if (plane->id == PLANE_CURSOR) return 0; - if (plane == 1 && !is_planar_yuv_format(format)) + + if (color_plane == 1 && + !drm_format_info_is_yuv_semiplanar(fb->format)) return 0; /* @@ -4223,7 +4215,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, height = drm_rect_height(&plane_state->base.src) >> 16; /* UV plane does 1/2 pixel sub-sampling */ - if (plane == 1 && is_planar_yuv_format(format)) { + if (color_plane == 1) { width /= 2; height /= 2; } @@ -4234,7 +4226,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state, rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount); - rate *= fb->format->cpp[plane]; + rate *= fb->format->cpp[color_plane]; return rate; } @@ -4244,18 +4236,16 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, u64 *uv_plane_data_rate) { struct drm_atomic_state *state = crtc_state->base.state; - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; u64 total_data_rate = 0; if (WARN_ON(!state)) return 0; /* Calculate and cache data rate for each plane */ - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) { - enum plane_id plane_id = to_intel_plane(plane)->id; - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + enum plane_id plane_id = plane->id; u64 rate; /* packed/y */ @@ -4276,21 +4266,19 @@ static u64 icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, u64 *plane_data_rate) { - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; u64 total_data_rate = 0; if (WARN_ON(!crtc_state->base.state)) return 0; /* Calculate and cache data rate for each plane */ - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, &crtc_state->base) { - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); - enum plane_id plane_id = to_intel_plane(plane)->id; + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { + enum plane_id plane_id = plane->id; u64 rate; - if (!plane_state->linked_plane) { + if (!plane_state->planar_linked_plane) { rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0); plane_data_rate[plane_id] = rate; total_data_rate += rate; @@ -4299,17 +4287,17 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, /* * The slave plane might not iterate in - * drm_atomic_crtc_state_for_each_plane_state(), + * intel_atomic_crtc_state_for_each_plane_state(), * and needs the master plane state which may be * NULL if we try get_new_plane_state(), so we * always calculate from the master. */ - if (plane_state->slave) + if (plane_state->planar_slave) continue; /* Y plane rate is calculated on the slave */ rate = skl_plane_relative_data_rate(crtc_state, plane_state, 0); - y_plane_id = plane_state->linked_plane->id; + y_plane_id = plane_state->planar_linked_plane->id; plane_data_rate[y_plane_id] = rate; total_data_rate += rate; @@ -4639,7 +4627,7 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, u32 interm_pbpl; /* only planar format has two planes */ - if (color_plane == 1 && !is_planar_yuv_format(format->format)) { + if (color_plane == 1 && !drm_format_info_is_yuv_semiplanar(format)) { DRM_DEBUG_KMS("Non planar format have single plane\n"); return -EINVAL; } @@ -4651,7 +4639,7 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, wp->x_tiled = modifier == I915_FORMAT_MOD_X_TILED; wp->rc_surface = modifier == I915_FORMAT_MOD_Y_TILED_CCS || modifier == I915_FORMAT_MOD_Yf_TILED_CCS; - wp->is_planar = is_planar_yuv_format(format->format); + wp->is_planar = drm_format_info_is_yuv_semiplanar(format); wp->width = width; if (color_plane == 1 && wp->is_planar) @@ -4723,20 +4711,15 @@ skl_compute_plane_wm_params(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, struct skl_wm_params *wp, int color_plane) { - struct intel_plane *plane = to_intel_plane(plane_state->base.plane); const struct drm_framebuffer *fb = plane_state->base.fb; int width; - if (plane->id == PLANE_CURSOR) { - width = plane_state->base.crtc_w; - } else { - /* - * Src coordinates are already rotated by 270 degrees for - * the 90/270 degree plane rotation cases (to match the - * GTT mapping), hence no need to account for rotation here. - */ - width = drm_rect_width(&plane_state->base.src) >> 16; - } + /* + * Src coordinates are already rotated by 270 degrees for + * the 90/270 degree plane rotation cases (to match the + * GTT mapping), hence no need to account for rotation here. + */ + width = drm_rect_width(&plane_state->base.src) >> 16; return skl_compute_wm_params(crtc_state, width, fb->format, fb->modifier, @@ -5048,12 +5031,12 @@ static int icl_build_plane_wm(struct intel_crtc_state *crtc_state, int ret; /* Watermarks calculated in master */ - if (plane_state->slave) + if (plane_state->planar_slave) return 0; - if (plane_state->linked_plane) { + if (plane_state->planar_linked_plane) { const struct drm_framebuffer *fb = plane_state->base.fb; - enum plane_id y_plane_id = plane_state->linked_plane->id; + enum plane_id y_plane_id = plane_state->planar_linked_plane->id; WARN_ON(!intel_wm_plane_visible(crtc_state, plane_state)); WARN_ON(!fb->format->is_yuv || @@ -5082,8 +5065,8 @@ static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; - struct drm_plane *plane; - const struct drm_plane_state *drm_plane_state; + struct intel_plane *plane; + const struct intel_plane_state *plane_state; int ret; /* @@ -5092,10 +5075,8 @@ static int skl_build_pipe_wm(struct intel_crtc_state *crtc_state) */ memset(pipe_wm->planes, 0, sizeof(pipe_wm->planes)); - drm_atomic_crtc_state_for_each_plane_state(plane, drm_plane_state, - &crtc_state->base) { - const struct intel_plane_state *plane_state = - to_intel_plane_state(drm_plane_state); + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, + crtc_state) { if (INTEL_GEN(dev_priv) >= 11) ret = icl_build_plane_wm(crtc_state, plane_state); @@ -5255,19 +5236,6 @@ bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry *ddb, return false; } -static u32 -pipes_modified(struct intel_atomic_state *state) -{ - struct intel_crtc *crtc; - struct intel_crtc_state *crtc_state; - u32 i, ret = 0; - - for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) - ret |= drm_crtc_mask(&crtc->base); - - return ret; -} - static int skl_ddb_add_affected_planes(const struct intel_crtc_state *old_crtc_state, struct intel_crtc_state *new_crtc_state) @@ -5443,36 +5411,27 @@ skl_print_wm_changes(struct intel_atomic_state *state) } } -static int -skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) +static int intel_add_all_pipes(struct intel_atomic_state *state) { - struct drm_device *dev = state->base.dev; - const struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc *crtc; - struct intel_crtc_state *crtc_state; - u32 realloc_pipes = pipes_modified(state); - int ret, i; - /* - * When we distrust bios wm we always need to recompute to set the - * expected DDB allocations for each CRTC. - */ - if (dev_priv->wm.distrust_bios_wm) - (*changed) = true; + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state; - /* - * If this transaction isn't actually touching any CRTC's, don't - * bother with watermark calculation. Note that if we pass this - * test, we're guaranteed to hold at least one CRTC state mutex, - * which means we can safely use values like dev_priv->active_crtcs - * since any racing commits that want to update them would need to - * hold _all_ CRTC state mutexes. - */ - for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) - (*changed) = true; + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + } - if (!*changed) - return 0; + return 0; +} + +static int +skl_ddb_add_affected_pipes(struct intel_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + int ret; /* * If this is our first atomic update following hardware readout, @@ -5481,7 +5440,7 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) * ensure a full DDB recompute. */ if (dev_priv->wm.distrust_bios_wm) { - ret = drm_modeset_lock(&dev->mode_config.connection_mutex, + ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex, state->base.acquire_ctx); if (ret) return ret; @@ -5489,13 +5448,13 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) state->active_pipe_changes = ~0; /* - * We usually only initialize state->active_crtcs if we + * We usually only initialize state->active_pipes if we * we're doing a modeset; make sure this field is always * initialized during the sanitization process that happens * on the first commit too. */ if (!state->modeset) - state->active_crtcs = dev_priv->active_crtcs; + state->active_pipes = dev_priv->active_pipes; } /* @@ -5512,18 +5471,11 @@ skl_ddb_add_affected_pipes(struct intel_atomic_state *state, bool *changed) * to grab the lock on *all* CRTC's. */ if (state->active_pipe_changes || state->modeset) { - realloc_pipes = ~0; state->wm_results.dirty_pipes = ~0; - } - /* - * We're not recomputing for the pipes not included in the commit, so - * make sure we start with the current state. - */ - for_each_intel_crtc_mask(dev, crtc, realloc_pipes) { - crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); - if (IS_ERR(crtc_state)) - return PTR_ERR(crtc_state); + ret = intel_add_all_pipes(state); + if (ret) + return ret; } return 0; @@ -5596,14 +5548,13 @@ skl_compute_wm(struct intel_atomic_state *state) struct intel_crtc_state *new_crtc_state; struct intel_crtc_state *old_crtc_state; struct skl_ddb_values *results = &state->wm_results; - bool changed = false; int ret, i; /* Clear all dirty flags */ results->dirty_pipes = 0; - ret = skl_ddb_add_affected_pipes(state, &changed); - if (ret || !changed) + ret = skl_ddb_add_affected_pipes(state); + if (ret) return ret; /* @@ -5625,7 +5576,7 @@ skl_compute_wm(struct intel_atomic_state *state) if (!skl_pipe_wm_equals(crtc, &old_crtc_state->wm.skl.optimal, &new_crtc_state->wm.skl.optimal)) - results->dirty_pipes |= drm_crtc_mask(&crtc->base); + results->dirty_pipes |= BIT(crtc->pipe); } ret = skl_compute_ddb(state); @@ -5645,7 +5596,7 @@ static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state, struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; enum pipe pipe = crtc->pipe; - if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base))) + if ((state->wm_results.dirty_pipes & BIT(crtc->pipe)) == 0) return; I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime); @@ -5654,12 +5605,11 @@ static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state, static void skl_initial_wm(struct intel_atomic_state *state, struct intel_crtc_state *crtc_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_device *dev = intel_crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct skl_ddb_values *results = &state->wm_results; - if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0) + if ((results->dirty_pipes & BIT(crtc->pipe)) == 0) return; mutex_lock(&dev_priv->wm.wm_mutex); @@ -5808,10 +5758,10 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal); if (crtc->active) - hw->dirty_pipes |= drm_crtc_mask(&crtc->base); + hw->dirty_pipes |= BIT(crtc->pipe); } - if (dev_priv->active_crtcs) { + if (dev_priv->active_pipes) { /* Fully recompute DDB on first atomic commit */ dev_priv->wm.distrust_bios_wm = true; } @@ -6915,142 +6865,27 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) return err; } -static void gen9_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); - I915_WRITE(GEN9_PG_ENABLE, 0); -} - static void gen9_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RP_CONTROL, 0); } -static void gen6_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); -} - static void gen6_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RPNSWREQ, 1 << 31); I915_WRITE(GEN6_RP_CONTROL, 0); } -static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) -{ - I915_WRITE(GEN6_RC_CONTROL, 0); -} - static void cherryview_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RP_CONTROL, 0); } -static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) -{ - /* We're doing forcewake before Disabling RC6, - * This what the BIOS expects when going into suspend */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - I915_WRITE(GEN6_RC_CONTROL, 0); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void valleyview_disable_rps(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RP_CONTROL, 0); } -static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv) -{ - bool enable_rc6 = true; - unsigned long rc6_ctx_base; - u32 rc_ctl; - int rc_sw_target; - - rc_ctl = I915_READ(GEN6_RC_CONTROL); - rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >> - RC_SW_TARGET_STATE_SHIFT; - DRM_DEBUG_DRIVER("BIOS enabled RC states: " - "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n", - onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE), - onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE), - rc_sw_target); - - if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) { - DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n"); - enable_rc6 = false; - } - - /* - * The exact context size is not known for BXT, so assume a page size - * for this check. - */ - rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK; - if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) && - (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) { - DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n"); - enable_rc6 = false; - } - - if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) && - ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) { - DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN8_PUSHBUS_CONTROL) || - !I915_READ(GEN8_PUSHBUS_ENABLE) || - !I915_READ(GEN8_PUSHBUS_SHIFT)) { - DRM_DEBUG_DRIVER("Pushbus not setup properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN6_GFXPAUSE)) { - DRM_DEBUG_DRIVER("GFX pause not setup properly.\n"); - enable_rc6 = false; - } - - if (!I915_READ(GEN8_MISC_CTRL0)) { - DRM_DEBUG_DRIVER("GPM control not setup properly.\n"); - enable_rc6 = false; - } - - return enable_rc6; -} - -static bool sanitize_rc6(struct drm_i915_private *i915) -{ - struct intel_device_info *info = mkwrite_device_info(i915); - - /* Powersaving is controlled by the host when inside a VM */ - if (intel_vgpu_active(i915)) { - info->has_rc6 = 0; - info->has_rps = false; - } - - if (info->has_rc6 && - IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) { - DRM_INFO("RC6 disabled by BIOS\n"); - info->has_rc6 = 0; - } - - /* - * We assume that we do not have any deep rc6 levels if we don't have - * have the previous rc6 level supported, i.e. we use HAS_RC6() - * as the initial coarse check for rc6 in general, moving on to - * progressively finer/deeper levels. - */ - if (!info->has_rc6 && info->has_rc6p) - info->has_rc6p = 0; - - return info->has_rc6; -} - static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -7137,203 +6972,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); } -static void gen11_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* - * 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have. - */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - if (HAS_GT_UC(dev_priv)) - I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); - - I915_WRITE(GEN6_RC_SLEEP, 0); - - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ - - /* - * 2c: Program Coarse Power Gating Policies. - * - * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we - * use instead is a more conservative estimate for the maximum time - * it takes us to service a CS interrupt and submit a new ELSP - that - * is the time which the GPU is idle waiting for the CPU to select the - * next request to execute. If the idle hysteresis is less than that - * interrupt service latency, the hardware will automatically gate - * the power well and we will then incur the wake up cost on top of - * the service latency. A similar guide from plane_state is that we - * do not want the enable hysteresis to less than the wakeup latency. - * - * igt/gem_exec_nop/sequential provides a rough estimate for the - * service latency, and puts it around 10us for Broadwell (and other - * big core) and around 40us for Broxton (and other low power cores). - * [Note that for legacy ringbuffer submission, this is less than 1us!] - * However, the wakeup latency on Broxton is closer to 100us. To be - * conservative, we have to factor in a context switch on top (due - * to ksoftirqd). - */ - I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); - I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); - - /* 3a: Enable RC6 */ - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - GEN6_RC_CTL_EI_MODE(1)); - - /* 3b: Enable Coarse Power Gating only when RC6 is enabled. */ - I915_WRITE(GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen9_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 rc6_mode; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - if (INTEL_GEN(dev_priv) >= 10) { - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85); - I915_WRITE(GEN10_MEDIA_WAKE_RATE_LIMIT, 150); - } else if (IS_SKYLAKE(dev_priv)) { - /* - * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only - * when CPG is enabled - */ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16); - } else { - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); - } - - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - if (HAS_GT_UC(dev_priv)) - I915_WRITE(GUC_MAX_IDLE_COUNT, 0xA); - - I915_WRITE(GEN6_RC_SLEEP, 0); - - /* - * 2c: Program Coarse Power Gating Policies. - * - * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we - * use instead is a more conservative estimate for the maximum time - * it takes us to service a CS interrupt and submit a new ELSP - that - * is the time which the GPU is idle waiting for the CPU to select the - * next request to execute. If the idle hysteresis is less than that - * interrupt service latency, the hardware will automatically gate - * the power well and we will then incur the wake up cost on top of - * the service latency. A similar guide from plane_state is that we - * do not want the enable hysteresis to less than the wakeup latency. - * - * igt/gem_exec_nop/sequential provides a rough estimate for the - * service latency, and puts it around 10us for Broadwell (and other - * big core) and around 40us for Broxton (and other low power cores). - * [Note that for legacy ringbuffer submission, this is less than 1us!] - * However, the wakeup latency on Broxton is closer to 100us. To be - * conservative, we have to factor in a context switch on top (due - * to ksoftirqd). - */ - I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250); - I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 250); - - /* 3a: Enable RC6 */ - I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - - /* WaRsUseTimeoutMode:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_C0)) - rc6_mode = GEN7_RC_CTL_TO_MODE; - else - rc6_mode = GEN6_RC_CTL_EI_MODE(1); - - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_RC6_ENABLE | - rc6_mode); - - /* - * 3b: Enable Coarse Power Gating only when RC6 is enabled. - * WaRsDisableCoarsePowerGating:skl,cnl - Render/Media PG need to be disabled with RC6. - */ - if (NEEDS_WaRsDisableCoarsePowerGating(dev_priv)) - I915_WRITE(GEN9_PG_ENABLE, 0); - else - I915_WRITE(GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - -static void gen8_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); - - /* 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2b: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - I915_WRITE(GEN6_RC_SLEEP, 0); - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ - - /* 3: Enable RC6 */ - - I915_WRITE(GEN6_RC_CONTROL, - GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - GEN6_RC_CTL_RC6_ENABLE); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void gen8_enable_rps(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -7374,75 +7012,6 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); } -static void gen6_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 rc6vids, rc6_mask; - u32 gtfifodbg; - int ret; - - I915_WRITE(GEN6_RC_STATE, 0); - - /* Clear the DBG now so we don't confuse earlier errors */ - gtfifodbg = I915_READ(GTFIFODBG); - if (gtfifodbg) { - DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* disable the counters and set deterministic thresholds */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); - I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - I915_WRITE(GEN6_RC_SLEEP, 0); - I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); - if (IS_IVYBRIDGE(dev_priv)) - I915_WRITE(GEN6_RC6_THRESHOLD, 125000); - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); - I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); - I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ - - /* We don't use those on Haswell */ - rc6_mask = GEN6_RC_CTL_RC6_ENABLE; - if (HAS_RC6p(dev_priv)) - rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; - if (HAS_RC6pp(dev_priv)) - rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; - I915_WRITE(GEN6_RC_CONTROL, - rc6_mask | - GEN6_RC_CTL_EI_MODE(1) | - GEN6_RC_CTL_HW_ENABLE); - - rc6vids = 0; - ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, - &rc6vids, NULL); - if (IS_GEN(dev_priv, 6) && ret) { - DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); - } else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { - DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", - GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); - rc6vids &= 0xffff00; - rc6vids |= GEN6_ENCODE_RC6_VID(450); - ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); - if (ret) - DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); - } - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void gen6_enable_rps(struct drm_i915_private *dev_priv) { /* Here begins a magic sequence of register writes to enable @@ -7462,93 +7031,6 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); } -static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) -{ - struct intel_rps *rps = &dev_priv->gt_pm.rps; - const int min_freq = 15; - const int scaling_factor = 180; - unsigned int gpu_freq; - unsigned int max_ia_freq, min_ring_freq; - unsigned int max_gpu_freq, min_gpu_freq; - struct cpufreq_policy *policy; - - lockdep_assert_held(&rps->lock); - - if (rps->max_freq <= rps->min_freq) - return; - - policy = cpufreq_cpu_get(0); - if (policy) { - max_ia_freq = policy->cpuinfo.max_freq; - cpufreq_cpu_put(policy); - } else { - /* - * Default to measured freq if none found, PCU will ensure we - * don't go over - */ - max_ia_freq = tsc_khz; - } - - /* Convert from kHz to MHz */ - max_ia_freq /= 1000; - - min_ring_freq = I915_READ(DCLK) & 0xf; - /* convert DDR frequency from units of 266.6MHz to bandwidth */ - min_ring_freq = mult_frac(min_ring_freq, 8, 3); - - min_gpu_freq = rps->min_freq; - max_gpu_freq = rps->max_freq; - if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - /* Convert GT frequency to 50 HZ units */ - min_gpu_freq /= GEN9_FREQ_SCALER; - max_gpu_freq /= GEN9_FREQ_SCALER; - } - - /* - * For each potential GPU frequency, load a ring frequency we'd like - * to use for memory access. We do this by specifying the IA frequency - * the PCU should use as a reference to determine the ring frequency. - */ - for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) { - const int diff = max_gpu_freq - gpu_freq; - unsigned int ia_freq = 0, ring_freq = 0; - - if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { - /* - * ring_freq = 2 * GT. ring_freq is in 100MHz units - * No floor required for ring frequency on SKL. - */ - ring_freq = gpu_freq; - } else if (INTEL_GEN(dev_priv) >= 8) { - /* max(2 * GT, DDR). NB: GT is 50MHz units */ - ring_freq = max(min_ring_freq, gpu_freq); - } else if (IS_HASWELL(dev_priv)) { - ring_freq = mult_frac(gpu_freq, 5, 4); - ring_freq = max(min_ring_freq, ring_freq); - /* leave ia_freq as the default, chosen by cpufreq */ - } else { - /* On older processors, there is no separate ring - * clock domain, so in order to boost the bandwidth - * of the ring, we need to upclock the CPU (ia_freq). - * - * For GPU frequencies less than 750MHz, - * just use the lowest ring freq. - */ - if (gpu_freq < min_freq) - ia_freq = 800; - else - ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); - ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); - } - - sandybridge_pcode_write(dev_priv, - GEN6_PCODE_WRITE_MIN_FREQ_TABLE, - ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | - ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | - gpu_freq); - } -} - static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) { u32 val, rp0; @@ -7659,100 +7141,6 @@ static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) return max_t(u32, val, 0xc0); } -/* Check that the pctx buffer wasn't move under us. */ -static void valleyview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - - WARN_ON(pctx_addr != dev_priv->dsm.start + - dev_priv->vlv_pctx->stolen->start); -} - - -/* Check that the pcbr address is not empty. */ -static void cherryview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - - WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); -} - -static void cherryview_setup_pctx(struct drm_i915_private *dev_priv) -{ - resource_size_t pctx_paddr, paddr; - resource_size_t pctx_size = 32*1024; - u32 pcbr; - - pcbr = I915_READ(VLV_PCBR); - if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { - DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - paddr = dev_priv->dsm.end + 1 - pctx_size; - GEM_BUG_ON(paddr > U32_MAX); - - pctx_paddr = (paddr & (~4095)); - I915_WRITE(VLV_PCBR, pctx_paddr); - } - - DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); -} - -static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) -{ - struct drm_i915_gem_object *pctx; - resource_size_t pctx_paddr; - resource_size_t pctx_size = 24*1024; - u32 pcbr; - - pcbr = I915_READ(VLV_PCBR); - if (pcbr) { - /* BIOS set it up already, grab the pre-alloc'd space */ - resource_size_t pcbr_offset; - - pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start; - pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv, - pcbr_offset, - I915_GTT_OFFSET_NONE, - pctx_size); - goto out; - } - - DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - - /* - * From the Gunit register HAS: - * The Gfx driver is expected to program this register and ensure - * proper allocation within Gfx stolen memory. For example, this - * register should be programmed such than the PCBR range does not - * overlap with other ranges, such as the frame buffer, protected - * memory, or any other relevant ranges. - */ - pctx = i915_gem_object_create_stolen(dev_priv, pctx_size); - if (!pctx) { - DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); - goto out; - } - - GEM_BUG_ON(range_overflows_t(u64, - dev_priv->dsm.start, - pctx->stolen->start, - U32_MAX)); - pctx_paddr = dev_priv->dsm.start + pctx->stolen->start; - I915_WRITE(VLV_PCBR, pctx_paddr); - -out: - DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); - dev_priv->vlv_pctx = pctx; -} - -static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) -{ - struct drm_i915_gem_object *pctx; - - pctx = fetch_and_zero(&dev_priv->vlv_pctx); - if (pctx) - i915_gem_object_put(pctx); -} - static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv) { dev_priv->gt_pm.rps.gpll_ref_freq = @@ -7769,8 +7157,6 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv) struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; - valleyview_setup_pctx(dev_priv); - vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | @@ -7825,8 +7211,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) struct intel_rps *rps = &dev_priv->gt_pm.rps; u32 val; - cherryview_setup_pctx(dev_priv); - vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_PUNIT) | BIT(VLV_IOSF_SB_NC) | @@ -7877,64 +7261,6 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) "Odd GPU freq values\n"); } -static void valleyview_cleanup_gt_powersave(struct drm_i915_private *dev_priv) -{ - valleyview_cleanup_pctx(dev_priv); -} - -static void cherryview_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 gtfifodbg, rc6_mode, pcbr; - - gtfifodbg = I915_READ(GTFIFODBG) & ~(GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV | - GT_FIFO_FREE_ENTRIES_CHV); - if (gtfifodbg) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - cherryview_check_pctx(dev_priv); - - /* 1a & 1b: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - /* 2a: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - I915_WRITE(GEN6_RC_SLEEP, 0); - - /* TO threshold set to 500 us ( 0x186 * 1.28 us) */ - I915_WRITE(GEN6_RC6_THRESHOLD, 0x186); - - /* Allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); - - /* For now we assume BIOS is allocating and populating the PCBR */ - pcbr = I915_READ(VLV_PCBR); - - /* 3: Enable RC6 */ - rc6_mode = 0; - if (pcbr >> VLV_PCBR_ADDR_SHIFT) - rc6_mode = GEN7_RC_CTL_TO_MODE; - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void cherryview_enable_rps(struct drm_i915_private *dev_priv) { u32 val; @@ -7979,49 +7305,6 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); } -static void valleyview_enable_rc6(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - u32 gtfifodbg; - - valleyview_check_pctx(dev_priv); - - gtfifodbg = I915_READ(GTFIFODBG); - if (gtfifodbg) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); - } - - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - /* Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); - - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); - - for_each_engine(engine, dev_priv, id) - I915_WRITE(RING_MAX_IDLE(engine->mmio_base), 10); - - I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - - /* Allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC0_COUNT_EN | - VLV_RENDER_RC0_COUNT_EN | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); - - I915_WRITE(GEN6_RC_CONTROL, - GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL); - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); -} - static void valleyview_enable_rps(struct drm_i915_private *dev_priv) { u32 val; @@ -8548,14 +7831,9 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; - /* - * RPM depends on RC6 to save restore the GT HW context, so make RC6 a - * requirement. - */ - if (!sanitize_rc6(dev_priv)) { - DRM_INFO("RC6 disabled, disabling runtime PM support\n"); - pm_runtime_get(&dev_priv->drm.pdev->dev); - } + /* Powersaving is controlled by the host when inside a VM */ + if (intel_vgpu_active(dev_priv)) + mkwrite_device_info(dev_priv)->has_rps = false; /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) @@ -8590,19 +7868,9 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) rps->cur_freq = rps->idle_freq; } -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) -{ - if (IS_VALLEYVIEW(dev_priv)) - valleyview_cleanup_gt_powersave(dev_priv); - - if (!HAS_RC6(dev_priv)) - pm_runtime_put(&dev_priv->drm.pdev->dev); -} - void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) { dev_priv->gt_pm.rps.enabled = true; /* force RPS disabling */ - dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ intel_disable_gt_powersave(dev_priv); if (INTEL_GEN(dev_priv) >= 11) @@ -8611,37 +7879,6 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) gen6_reset_rps_interrupts(dev_priv); } -static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) -{ - lockdep_assert_held(&i915->gt_pm.rps.lock); - - if (!i915->gt_pm.llc_pstate.enabled) - return; - - /* Currently there is no HW configuration to be done to disable. */ - - i915->gt_pm.llc_pstate.enabled = false; -} - -static void intel_disable_rc6(struct drm_i915_private *dev_priv) -{ - lockdep_assert_held(&dev_priv->gt_pm.rps.lock); - - if (!dev_priv->gt_pm.rc6.enabled) - return; - - if (INTEL_GEN(dev_priv) >= 9) - gen9_disable_rc6(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) - cherryview_disable_rc6(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_disable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_disable_rc6(dev_priv); - - dev_priv->gt_pm.rc6.enabled = false; -} - static void intel_disable_rps(struct drm_i915_private *dev_priv) { lockdep_assert_held(&dev_priv->gt_pm.rps.lock); @@ -8667,49 +7904,13 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->gt_pm.rps.lock); - intel_disable_rc6(dev_priv); intel_disable_rps(dev_priv); if (HAS_LLC(dev_priv)) - intel_disable_llc_pstate(dev_priv); + intel_llc_disable(&dev_priv->gt.llc); mutex_unlock(&dev_priv->gt_pm.rps.lock); } -static inline void intel_enable_llc_pstate(struct drm_i915_private *i915) -{ - lockdep_assert_held(&i915->gt_pm.rps.lock); - - if (i915->gt_pm.llc_pstate.enabled) - return; - - gen6_update_ring_freq(i915); - - i915->gt_pm.llc_pstate.enabled = true; -} - -static void intel_enable_rc6(struct drm_i915_private *dev_priv) -{ - lockdep_assert_held(&dev_priv->gt_pm.rps.lock); - - if (dev_priv->gt_pm.rc6.enabled) - return; - - if (IS_CHERRYVIEW(dev_priv)) - cherryview_enable_rc6(dev_priv); - else if (IS_VALLEYVIEW(dev_priv)) - valleyview_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 11) - gen11_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 9) - gen9_enable_rc6(dev_priv); - else if (IS_BROADWELL(dev_priv)) - gen8_enable_rc6(dev_priv); - else if (INTEL_GEN(dev_priv) >= 6) - gen6_enable_rc6(dev_priv); - - dev_priv->gt_pm.rc6.enabled = true; -} - static void intel_enable_rps(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; @@ -8751,12 +7952,10 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->gt_pm.rps.lock); - if (HAS_RC6(dev_priv)) - intel_enable_rc6(dev_priv); if (HAS_RPS(dev_priv)) intel_enable_rps(dev_priv); - if (HAS_LLC(dev_priv)) - intel_enable_llc_pstate(dev_priv); + + intel_llc_enable(&dev_priv->gt.llc); mutex_unlock(&dev_priv->gt_pm.rps.lock); } @@ -8858,7 +8057,7 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv) static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) { - int pipe; + enum pipe pipe; u32 val; /* @@ -9078,6 +8277,22 @@ static void icl_init_clock_gating(struct drm_i915_private *dev_priv) _MASKED_BIT_ENABLE(GEN11_ENABLE_32_PLANE_MODE)); } +static void tgl_init_clock_gating(struct drm_i915_private *dev_priv) +{ + u32 vd_pg_enable = 0; + unsigned int i; + + /* This is not a WA. Enable VD HCP & MFX_ENC powergate */ + for (i = 0; i < I915_MAX_VCS; i++) { + if (HAS_ENGINE(dev_priv, _VCS(i))) + vd_pg_enable |= VDN_HCP_POWERGATE_ENABLE(i) | + VDN_MFX_POWERGATE_ENABLE(i); + } + + I915_WRITE(POWERGATE_ENABLE, + I915_READ(POWERGATE_ENABLE) | vd_pg_enable); +} + static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) { if (!HAS_PCH_CNP(dev_priv)) @@ -9598,7 +8813,7 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv) void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { if (IS_GEN(dev_priv, 12)) - dev_priv->display.init_clock_gating = nop_init_clock_gating; + dev_priv->display.init_clock_gating = tgl_init_clock_gating; else if (IS_GEN(dev_priv, 11)) dev_priv->display.init_clock_gating = icl_init_clock_gating; else if (IS_CANNONLAKE(dev_priv)) @@ -9654,6 +8869,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv) else if (IS_GEN(dev_priv, 5)) i915_ironlake_get_mem_freq(dev_priv); + if (intel_has_sagv(dev_priv)) + skl_setup_sagv_block_time(dev_priv); + /* For FIFO watermark updates */ if (INTEL_GEN(dev_priv) >= 9) { skl_setup_wm_latency(dev_priv); @@ -9712,7 +8930,7 @@ void intel_init_pm(struct drm_i915_private *dev_priv) dev_priv->display.update_wm = i9xx_update_wm; dev_priv->display.get_fifo_size = i9xx_get_fifo_size; } else if (IS_GEN(dev_priv, 2)) { - if (INTEL_INFO(dev_priv)->num_pipes == 1) { + if (INTEL_NUM_PIPES(dev_priv) == 1) { dev_priv->display.update_wm = i845_update_wm; dev_priv->display.get_fifo_size = i845_get_fifo_size; } else { @@ -9798,133 +9016,6 @@ void intel_pm_setup(struct drm_i915_private *dev_priv) atomic_set(&dev_priv->runtime_pm.wakeref_count, 0); } -static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, - const i915_reg_t reg) -{ - u32 lower, upper, tmp; - int loop = 2; - - /* - * The register accessed do not need forcewake. We borrow - * uncore lock to prevent concurrent access to range reg. - */ - lockdep_assert_held(&dev_priv->uncore.lock); - - /* - * vlv and chv residency counters are 40 bits in width. - * With a control bit, we can choose between upper or lower - * 32bit window into this counter. - * - * Although we always use the counter in high-range mode elsewhere, - * userspace may attempt to read the value before rc6 is initialised, - * before we have set the default VLV_COUNTER_CONTROL value. So always - * set the high bit to be safe. - */ - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); - upper = I915_READ_FW(reg); - do { - tmp = upper; - - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); - lower = I915_READ_FW(reg); - - I915_WRITE_FW(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); - upper = I915_READ_FW(reg); - } while (upper != tmp && --loop); - - /* - * Everywhere else we always use VLV_COUNTER_CONTROL with the - * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set - * now. - */ - - return lower | (u64)upper << 8; -} - -u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, - const i915_reg_t reg) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u64 time_hw, prev_hw, overflow_hw; - unsigned int fw_domains; - unsigned long flags; - unsigned int i; - u32 mul, div; - - if (!HAS_RC6(dev_priv)) - return 0; - - /* - * Store previous hw counter values for counter wrap-around handling. - * - * There are only four interesting registers and they live next to each - * other so we can use the relative address, compared to the smallest - * one as the index into driver storage. - */ - i = (i915_mmio_reg_offset(reg) - - i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32); - if (WARN_ON_ONCE(i >= ARRAY_SIZE(dev_priv->gt_pm.rc6.cur_residency))) - return 0; - - fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); - - spin_lock_irqsave(&uncore->lock, flags); - intel_uncore_forcewake_get__locked(uncore, fw_domains); - - /* On VLV and CHV, residency time is in CZ units rather than 1.28us */ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - mul = 1000000; - div = dev_priv->czclk_freq; - overflow_hw = BIT_ULL(40); - time_hw = vlv_residency_raw(dev_priv, reg); - } else { - /* 833.33ns units on Gen9LP, 1.28us elsewhere. */ - if (IS_GEN9_LP(dev_priv)) { - mul = 10000; - div = 12; - } else { - mul = 1280; - div = 1; - } - - overflow_hw = BIT_ULL(32); - time_hw = intel_uncore_read_fw(uncore, reg); - } - - /* - * Counter wrap handling. - * - * But relying on a sufficient frequency of queries otherwise counters - * can still wrap. - */ - prev_hw = dev_priv->gt_pm.rc6.prev_hw_residency[i]; - dev_priv->gt_pm.rc6.prev_hw_residency[i] = time_hw; - - /* RC6 delta from last sample. */ - if (time_hw >= prev_hw) - time_hw -= prev_hw; - else - time_hw += overflow_hw - prev_hw; - - /* Add delta to RC6 extended raw driver copy. */ - time_hw += dev_priv->gt_pm.rc6.cur_residency[i]; - dev_priv->gt_pm.rc6.cur_residency[i] = time_hw; - - intel_uncore_forcewake_put__locked(uncore, fw_domains); - spin_unlock_irqrestore(&uncore->lock, flags); - - return mul_u64_u32_div(time_hw, mul, div); -} - -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, - i915_reg_t reg) -{ - return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000); -} - u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat) { u32 cagf; diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index e3573e1e16e3..93d192d0610a 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -32,7 +32,6 @@ void intel_pm_setup(struct drm_i915_private *dev_priv); void intel_gpu_ips_init(struct drm_i915_private *dev_priv); void intel_gpu_ips_teardown(void); void intel_init_gt_powersave(struct drm_i915_private *dev_priv); -void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv); void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); @@ -72,8 +71,6 @@ void intel_enable_ipc(struct drm_i915_private *dev_priv); int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); -u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, i915_reg_t reg); -u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, i915_reg_t reg); u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 2fd3c097e1f5..ad719c9602af 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -27,7 +27,6 @@ */ #include <linux/pm_runtime.h> -#include <linux/vgaarb.h> #include <drm/drm_print.h> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9e583f13a9e4..94a97bf8c021 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -805,9 +805,6 @@ void assert_forcewakes_active(struct intel_uncore *uncore, /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000) -#define GEN11_NEEDS_FORCE_WAKE(reg) \ - ((reg) < 0x40000 || ((reg) >= 0x1c0000 && (reg) < 0x1dc000)) - #define __gen6_reg_read_fw_domains(uncore, offset) \ ({ \ enum forcewake_domains __fwd; \ @@ -903,12 +900,10 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { }) #define __gen11_fwtable_reg_read_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd = 0; \ - if (GEN11_NEEDS_FORCE_WAKE((offset))) \ - __fwd = find_fw_domain(uncore, offset); \ - __fwd; \ -}) + find_fw_domain(uncore, offset) + +#define __gen12_fwtable_reg_read_fw_domains(uncore, offset) \ + find_fw_domain(uncore, offset) /* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const i915_reg_t gen8_shadowed_regs[] = { @@ -935,6 +930,20 @@ static const i915_reg_t gen11_shadowed_regs[] = { /* TODO: Other registers are not yet used */ }; +static const i915_reg_t gen12_shadowed_regs[] = { + RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ + GEN6_RPNSWREQ, /* 0xA008 */ + GEN6_RC_VIDEO_FREQ, /* 0xA00C */ + RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ + RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ + RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ + RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ + RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ + RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ + RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ + /* TODO: Other registers are not yet used */ +}; + static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) { u32 offset = i915_mmio_reg_offset(*reg); @@ -957,6 +966,7 @@ static bool is_gen##x##_shadowed(u32 offset) \ __is_genX_shadowed(8) __is_genX_shadowed(11) +__is_genX_shadowed(12) static enum forcewake_domains gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) @@ -1005,8 +1015,18 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { #define __gen11_fwtable_reg_write_fw_domains(uncore, offset) \ ({ \ enum forcewake_domains __fwd = 0; \ - if (GEN11_NEEDS_FORCE_WAKE((offset)) && !is_gen11_shadowed(offset)) \ - __fwd = find_fw_domain(uncore, offset); \ + const u32 __offset = (offset); \ + if (!is_gen11_shadowed(__offset)) \ + __fwd = find_fw_domain(uncore, __offset); \ + __fwd; \ +}) + +#define __gen12_fwtable_reg_write_fw_domains(uncore, offset) \ +({ \ + enum forcewake_domains __fwd = 0; \ + const u32 __offset = (offset); \ + if (!is_gen12_shadowed(__offset)) \ + __fwd = find_fw_domain(uncore, __offset); \ __fwd; \ }) @@ -1065,9 +1085,51 @@ static const struct intel_forcewake_range __gen11_fw_ranges[] = { GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL), GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xb480, 0xdeff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xdf00, 0xe8ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0xe900, 0x16dff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x16e00, 0x19fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x1a000, 0x243ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x40000, 0x1bffff, 0), + GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), + GEN_FW_RANGE(0x1c4000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX1), + GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), + GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1d0000, 0x1d3fff, FORCEWAKE_MEDIA_VDBOX2), + GEN_FW_RANGE(0x1d4000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX3), + GEN_FW_RANGE(0x1d8000, 0x1dbfff, FORCEWAKE_MEDIA_VEBOX1) +}; + +/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ +static const struct intel_forcewake_range __gen12_fw_ranges[] = { + GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb00, 0x1fff, 0), /* uncore range */ + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x4000, 0x51ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8140, 0x815f, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8160, 0x82ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8300, 0x84ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8500, 0x8bff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x8c00, 0x8cff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x8d00, 0x93ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_ALL), + GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER), GEN_FW_RANGE(0xb480, 0xdfff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0xe000, 0xe8ff, FORCEWAKE_RENDER), - GEN_FW_RANGE(0xe900, 0x243ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xe900, 0x147ff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x14800, 0x148ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x14900, 0x19fff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1a000, 0x1a7ff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x1a800, 0x1afff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0x1b000, 0x1bfff, FORCEWAKE_RENDER), + GEN_FW_RANGE(0x1c000, 0x243ff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0x24400, 0x247ff, FORCEWAKE_RENDER), GEN_FW_RANGE(0x24800, 0x3ffff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0x40000, 0x1bffff, 0), @@ -1228,6 +1290,7 @@ __gen_read(func, 16) \ __gen_read(func, 32) \ __gen_read(func, 64) +__gen_reg_read_funcs(gen12_fwtable); __gen_reg_read_funcs(gen11_fwtable); __gen_reg_read_funcs(fwtable); __gen_reg_read_funcs(gen6); @@ -1319,6 +1382,7 @@ __gen_write(func, 8) \ __gen_write(func, 16) \ __gen_write(func, 32) +__gen_reg_write_funcs(gen12_fwtable); __gen_reg_write_funcs(gen11_fwtable); __gen_reg_write_funcs(fwtable); __gen_reg_write_funcs(gen8); @@ -1690,10 +1754,14 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) ASSIGN_FW_DOMAINS_TABLE(uncore, __gen9_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable); - } else { + } else if (IS_GEN(i915, 11)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); + } else { + ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen12_fwtable); } uncore->pmic_bus_access_nb.notifier_call = i915_pmic_bus_access_notifier; diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 414fc2cb0459..dcfa243892c6 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -378,23 +378,23 @@ intel_uncore_read64_2x32(struct intel_uncore *uncore, static inline void intel_uncore_rmw(struct intel_uncore *uncore, i915_reg_t reg, u32 clear, u32 set) { - u32 val; + u32 old, val; - val = intel_uncore_read(uncore, reg); - val &= ~clear; - val |= set; - intel_uncore_write(uncore, reg, val); + old = intel_uncore_read(uncore, reg); + val = (old & ~clear) | set; + if (val != old) + intel_uncore_write(uncore, reg, val); } static inline void intel_uncore_rmw_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clear, u32 set) { - u32 val; + u32 old, val; - val = intel_uncore_read_fw(uncore, reg); - val &= ~clear; - val |= set; - intel_uncore_write_fw(uncore, reg, val); + old = intel_uncore_read_fw(uncore, reg); + val = (old & ~clear) | set; + if (val != old) + intel_uncore_write_fw(uncore, reg, val); } static inline int intel_uncore_write_and_verify(struct intel_uncore *uncore, diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 77d844ac8b71..268192b5613b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -68,7 +68,7 @@ static struct live_active *__live_alloc(struct drm_i915_private *i915) return NULL; kref_init(&active->ref); - i915_active_init(i915, &active->base, __live_active, __live_retire); + i915_active_init(&active->base, __live_active, __live_retire); return active; } @@ -110,7 +110,7 @@ __live_active_setup(struct drm_i915_private *i915) submit, GFP_KERNEL); if (err >= 0) - err = i915_active_ref(&active->base, rq->timeline, rq); + err = i915_active_add_request(&active->base, rq); i915_request_add(rq); if (err) { pr_err("Failed to track active ref!\n"); @@ -121,7 +121,7 @@ __live_active_setup(struct drm_i915_private *i915) } i915_active_release(&active->base); - if (active->retired && count) { + if (READ_ONCE(active->retired) && count) { pr_err("i915_active retired before submission!\n"); err = -EINVAL; } @@ -146,35 +146,25 @@ static int live_active_wait(void *arg) { struct drm_i915_private *i915 = arg; struct live_active *active; - intel_wakeref_t wakeref; int err = 0; /* Check that we get a callback when requests retire upon waiting */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - active = __live_active_setup(i915); - if (IS_ERR(active)) { - err = PTR_ERR(active); - goto err; - } + if (IS_ERR(active)) + return PTR_ERR(active); i915_active_wait(&active->base); - if (!active->retired) { + if (!READ_ONCE(active->retired)) { pr_err("i915_active not retired after waiting!\n"); err = -EINVAL; } __live_put(active); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; -err: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; } @@ -182,35 +172,25 @@ static int live_active_retire(void *arg) { struct drm_i915_private *i915 = arg; struct live_active *active; - intel_wakeref_t wakeref; int err = 0; /* Check that we get a callback when requests are indirectly retired */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - active = __live_active_setup(i915); - if (IS_ERR(active)) { - err = PTR_ERR(active); - goto err; - } + if (IS_ERR(active)) + return PTR_ERR(active); /* waits for & retires all requests */ - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; - if (!active->retired) { + if (!READ_ONCE(active->retired)) { pr_err("i915_active not retired after flushing!\n"); err = -EINVAL; } __live_put(active); -err: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_buddy.c b/drivers/gpu/drm/i915/selftests/i915_buddy.c index 23f784eae1e7..1b856bae67b5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_buddy.c +++ b/drivers/gpu/drm/i915/selftests/i915_buddy.c @@ -375,6 +375,8 @@ retry: if (err) break; + + cond_resched(); } if (err == -ENOMEM) @@ -687,6 +689,8 @@ static int igt_buddy_alloc_range(void *arg) rem -= size; if (!rem) break; + + cond_resched(); } if (err == -ENOMEM) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index bb6dd54a6ff3..97f89f744ee2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -118,6 +118,10 @@ static void pm_resume(struct drm_i915_private *i915) with_intel_runtime_pm(&i915->runtime_pm, wakeref) { intel_gt_sanitize(&i915->gt, false); i915_gem_sanitize(i915); + + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(&i915->ggtt); + i915_gem_resume(i915); } } @@ -134,11 +138,9 @@ static int igt_gem_suspend(void *arg) return PTR_ERR(file); err = -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); ctx = live_context(i915, file); if (!IS_ERR(ctx)) err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto out; @@ -153,9 +155,7 @@ static int igt_gem_suspend(void *arg) pm_resume(i915); - mutex_lock(&i915->drm.struct_mutex); err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); out: mock_file_free(i915, file); return err; @@ -173,11 +173,9 @@ static int igt_gem_hibernate(void *arg) return PTR_ERR(file); err = -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); ctx = live_context(i915, file); if (!IS_ERR(ctx)) err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto out; @@ -192,9 +190,7 @@ static int igt_gem_hibernate(void *arg) pm_resume(i915); - mutex_lock(&i915->drm.struct_mutex); err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); out: mock_file_free(i915, file); return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index cb30c669b1b7..0af9a58d011d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -106,14 +106,11 @@ static int populate_ggtt(struct drm_i915_private *i915, static void unpin_ggtt(struct drm_i915_private *i915) { - struct i915_ggtt *ggtt = &i915->ggtt; struct i915_vma *vma; - mutex_lock(&ggtt->vm.mutex); list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) if (vma->obj->mm.quirked) i915_vma_unpin(vma); - mutex_unlock(&ggtt->vm.mutex); } static void cleanup_objects(struct drm_i915_private *i915, @@ -127,11 +124,7 @@ static void cleanup_objects(struct drm_i915_private *i915, i915_gem_object_put(obj); } - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_drain_freed_objects(i915); - - mutex_lock(&i915->drm.struct_mutex); } static int igt_evict_something(void *arg) @@ -148,10 +141,12 @@ static int igt_evict_something(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_something(&ggtt->vm, I915_GTT_PAGE_SIZE, 0, 0, 0, U64_MAX, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", err); @@ -161,10 +156,12 @@ static int igt_evict_something(void *arg) unpin_ggtt(i915); /* Everything is unpinned, we should be able to evict something */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_something(&ggtt->vm, I915_GTT_PAGE_SIZE, 0, 0, 0, U64_MAX, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", err); @@ -230,7 +227,9 @@ static int igt_evict_for_vma(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n", err); @@ -240,7 +239,9 @@ static int igt_evict_for_vma(void *arg) unpin_ggtt(i915); /* Everything is unpinned, we should be able to evict the node */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_for_node returned err=%d\n", err); @@ -274,12 +275,14 @@ static int igt_evict_for_cache_color(void *arg) LIST_HEAD(objects); int err; - /* Currently the use of color_adjust is limited to cache domains within - * the ggtt, and so the presence of mm.color_adjust is assumed to be - * i915_gtt_color_adjust throughout our driver, so using a mock color - * adjust will work just fine for our purposes. + /* + * Currently the use of color_adjust for the GGTT is limited to cache + * coloring and guard pages, and so the presence of mm.color_adjust for + * the GGTT is assumed to be i915_ggtt_color_adjust, hence using a mock + * color adjust will work just fine for our purposes. */ ggtt->vm.mm.color_adjust = mock_color_adjust; + GEM_BUG_ON(!i915_vm_has_cache_coloring(&ggtt->vm)); obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { @@ -317,7 +320,9 @@ static int igt_evict_for_cache_color(void *arg) i915_vma_unpin(vma); /* Remove just the second vma */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err); goto cleanup; @@ -328,7 +333,9 @@ static int igt_evict_for_cache_color(void *arg) */ target.color = I915_CACHE_L3_LLC; + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (!err) { pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err); err = -EINVAL; @@ -358,7 +365,9 @@ static int igt_evict_vm(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_vm(&ggtt->vm); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); @@ -367,7 +376,9 @@ static int igt_evict_vm(void *arg) unpin_ggtt(i915); + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_vm(&ggtt->vm); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); @@ -408,11 +419,11 @@ static int igt_evict_contexts(void *arg) if (!HAS_FULL_PPGTT(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); /* Reserve a block so that we know we have enough to fit a few rq */ memset(&hole, 0, sizeof(hole)); + mutex_lock(&i915->ggtt.vm.mutex); err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole, PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, i915->ggtt.vm.total, @@ -425,7 +436,9 @@ static int igt_evict_contexts(void *arg) do { struct reserved *r; + mutex_unlock(&i915->ggtt.vm.mutex); r = kcalloc(1, sizeof(*r), GFP_KERNEL); + mutex_lock(&i915->ggtt.vm.mutex); if (!r) { err = -ENOMEM; goto out_locked; @@ -445,7 +458,7 @@ static int igt_evict_contexts(void *arg) count++; } while (1); drm_mm_remove_node(&hole); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&i915->ggtt.vm.mutex); pr_info("Filled GGTT with %lu 1MiB nodes\n", count); /* Overfill the GGTT with context objects and so try to evict one. */ @@ -460,7 +473,6 @@ static int igt_evict_contexts(void *arg) } count = 0; - mutex_lock(&i915->drm.struct_mutex); onstack_fence_init(&fence); do { struct i915_request *rq; @@ -478,8 +490,8 @@ static int igt_evict_contexts(void *arg) if (IS_ERR(rq)) { /* When full, fail_if_busy will trigger EBUSY */ if (PTR_ERR(rq) != -EBUSY) { - pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n", - ctx->hw_id, engine->name, + pr_err("Unexpected error from request alloc (on %s): %d\n", + engine->name, (int)PTR_ERR(rq)); err = PTR_ERR(rq); } @@ -497,8 +509,6 @@ static int igt_evict_contexts(void *arg) count++; err = 0; } while(1); - mutex_unlock(&i915->drm.struct_mutex); - onstack_fence_fini(&fence); pr_info("Submitted %lu contexts/requests on %s\n", count, engine->name); @@ -508,9 +518,9 @@ static int igt_evict_contexts(void *arg) break; } - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&i915->ggtt.vm.mutex); out_locked: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; while (reserved) { struct reserved *next = reserved->next; @@ -522,8 +532,8 @@ out_locked: } if (drm_mm_node_allocated(&hole)) drm_mm_remove_node(&hole); + mutex_unlock(&i915->ggtt.vm.mutex); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -545,12 +555,9 @@ int i915_gem_evict_mock_selftests(void) if (!i915) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); with_intel_runtime_pm(&i915->runtime_pm, wakeref) err = i915_subtests(tests, i915); - mutex_unlock(&i915->drm.struct_mutex); - drm_dev_put(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 31a51ca1ddcb..ebe735df6504 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -25,26 +25,20 @@ #include <linux/list_sort.h> #include <linux/prime_numbers.h> +#include "gem/i915_gem_context.h" #include "gem/selftests/mock_context.h" +#include "gt/intel_context.h" #include "i915_random.h" #include "i915_selftest.h" #include "mock_drm.h" #include "mock_gem_device.h" +#include "igt_flush_test.h" static void cleanup_freed_objects(struct drm_i915_private *i915) { - /* - * As we may hold onto the struct_mutex for inordinate lengths of - * time, the NMI khungtaskd detector may fire for the free objects - * worker. - */ - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_drain_freed_objects(i915); - - mutex_lock(&i915->drm.struct_mutex); } static void fake_free_pages(struct drm_i915_gem_object *obj, @@ -88,8 +82,6 @@ static int fake_get_pages(struct drm_i915_gem_object *obj) } GEM_BUG_ON(rem); - obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_set_pages(obj, pages, sg_page_sizes); return 0; @@ -101,7 +93,6 @@ static void fake_put_pages(struct drm_i915_gem_object *obj, { fake_free_pages(obj, pages); obj->mm.dirty = false; - obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops fake_ops = { @@ -128,6 +119,8 @@ fake_dma_object(struct drm_i915_private *i915, u64 size) drm_gem_private_object_init(&i915->drm, &obj->base, size); i915_gem_object_init(obj, &fake_ops); + i915_gem_object_set_volatile(obj); + obj->write_domain = I915_GEM_DOMAIN_CPU; obj->read_domains = I915_GEM_DOMAIN_CPU; obj->cache_level = I915_CACHE_NONE; @@ -293,18 +286,20 @@ static int lowlevel_hole(struct drm_i915_private *i915, mock_vma.node.size = BIT_ULL(size); mock_vma.node.start = addr; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0); - intel_runtime_pm_put(&i915->runtime_pm, wakeref); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + vm->insert_entries(vm, &mock_vma, + I915_CACHE_NONE, 0); } count = n; i915_random_reorder(order, count, &prng); for (n = 0; n < count; n++) { u64 addr = hole_start + order[n] * BIT_ULL(size); + intel_wakeref_t wakeref; GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); - vm->clear_range(vm, addr, BIT_ULL(size)); + with_intel_runtime_pm(&i915->runtime_pm, wakeref) + vm->clear_range(vm, addr, BIT_ULL(size)); } i915_gem_object_unpin_pages(obj); @@ -875,6 +870,15 @@ static int __shrink_hole(struct drm_i915_private *i915, i915_vma_unpin(vma); addr += size; + /* + * Since we are injecting allocation faults at random intervals, + * wait for this allocation to complete before we change the + * faultinjection. + */ + err = i915_vma_sync(vma); + if (err) + break; + if (igt_timeout(end_time, "%s timed out at ofset %llx [%llx - %llx]\n", __func__, addr, hole_start, hole_end)) { @@ -1008,21 +1012,19 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&dev_priv->drm.struct_mutex); ppgtt = i915_ppgtt_create(dev_priv); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); - goto out_unlock; + goto out_free; } GEM_BUG_ON(offset_in_page(ppgtt->vm.total)); - GEM_BUG_ON(ppgtt->vm.closed); + GEM_BUG_ON(!atomic_read(&ppgtt->vm.open)); err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time); i915_vm_put(&ppgtt->vm); -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); +out_free: mock_file_free(dev_priv, file); return err; } @@ -1085,7 +1087,6 @@ static int exercise_ggtt(struct drm_i915_private *i915, IGT_TIMEOUT(end_time); int err = 0; - mutex_lock(&i915->drm.struct_mutex); restart: list_sort(NULL, &ggtt->vm.mm.hole_stack, sort_holes); drm_mm_for_each_hole(node, &ggtt->vm.mm, hole_start, hole_end) { @@ -1106,7 +1107,6 @@ restart: last = hole_end; goto restart; } - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1148,13 +1148,9 @@ static int igt_ggtt_page(void *arg) unsigned int *order, n; int err; - mutex_lock(&i915->drm.struct_mutex); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_unlock; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); err = i915_gem_object_pin_pages(obj); if (err) @@ -1222,8 +1218,6 @@ out_unpin: i915_gem_object_unpin_pages(obj); out_free: i915_gem_object_put(obj); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1234,10 +1228,13 @@ static void track_vma_bind(struct i915_vma *vma) atomic_inc(&obj->bind_count); /* track for eviction later */ __i915_gem_object_pin_pages(obj); + GEM_BUG_ON(vma->pages); + atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); + __i915_gem_object_pin_pages(obj); vma->pages = obj->mm.pages; mutex_lock(&vma->vm->mutex); - list_move_tail(&vma->vm_link, &vma->vm->bound_list); + list_add_tail(&vma->vm_link, &vma->vm->bound_list); mutex_unlock(&vma->vm->mutex); } @@ -1248,6 +1245,7 @@ static int exercise_mock(struct drm_i915_private *i915, unsigned long end_time)) { const u64 limit = totalram_pages() << PAGE_SHIFT; + struct i915_address_space *vm; struct i915_gem_context *ctx; IGT_TIMEOUT(end_time); int err; @@ -1256,7 +1254,9 @@ static int exercise_mock(struct drm_i915_private *i915, if (!ctx) return -ENOMEM; - err = func(i915, ctx->vm, 0, min(ctx->vm->total, limit), end_time); + vm = i915_gem_context_get_vm_rcu(ctx); + err = func(i915, vm, 0, min(vm->total, limit), end_time); + i915_vm_put(vm); mock_context_close(ctx); return err; @@ -1294,6 +1294,7 @@ static int igt_gtt_reserve(void *arg) { struct i915_ggtt *ggtt = arg; struct drm_i915_gem_object *obj, *on; + I915_RND_STATE(prng); LIST_HEAD(objects); u64 total; int err = -ENODEV; @@ -1330,11 +1331,13 @@ static int igt_gtt_reserve(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1380,11 +1383,13 @@ static int igt_gtt_reserve(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1420,15 +1425,18 @@ static int igt_gtt_reserve(void *arg) goto out; } - offset = random_offset(0, ggtt->vm.total, - 2*I915_GTT_PAGE_SIZE, - I915_GTT_MIN_ALIGNMENT); + offset = igt_random_offset(&prng, + 0, ggtt->vm.total, + 2 * I915_GTT_PAGE_SIZE, + I915_GTT_MIN_ALIGNMENT); + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, offset, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1497,11 +1505,13 @@ static int igt_gtt_insert(void *arg) /* Check a couple of obviously invalid requests */ for (ii = invalid_insert; ii->size; ii++) { + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &tmp, ii->size, ii->alignment, I915_COLOR_UNEVICTABLE, ii->start, ii->end, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n", ii->size, ii->alignment, ii->start, ii->end, @@ -1537,10 +1547,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err == -ENOSPC) { /* maxed out the GGTT space */ i915_gem_object_put(obj); @@ -1595,10 +1607,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1642,10 +1656,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1689,13 +1705,10 @@ int i915_gem_gtt_mock_selftests(void) } mock_init_ggtt(i915, ggtt); - mutex_lock(&i915->drm.struct_mutex); err = i915_subtests(tests, ggtt); - mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); + mock_device_flush(i915); i915_gem_drain_freed_objects(i915); - mock_fini_ggtt(ggtt); kfree(ggtt); out_put: @@ -1703,6 +1716,312 @@ out_put: return err; } +static int context_sync(struct intel_context *ce) +{ + struct i915_request *rq; + long timeout; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + + timeout = i915_request_wait(rq, 0, HZ / 5); + i915_request_put(rq); + + return timeout < 0 ? -EIO : 0; +} + +static struct i915_request * +submit_batch(struct intel_context *ce, u64 addr) +{ + struct i915_request *rq; + int err; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + err = 0; + if (rq->engine->emit_init_breadcrumb) /* detect a hang */ + err = rq->engine->emit_init_breadcrumb(rq); + if (err == 0) + err = rq->engine->emit_bb_start(rq, addr, 0, 0); + + if (err == 0) + i915_request_get(rq); + i915_request_add(rq); + + return err ? ERR_PTR(err) : rq; +} + +static u32 *spinner(u32 *batch, int i) +{ + return batch + i * 64 / sizeof(*batch) + 4; +} + +static void end_spin(u32 *batch, int i) +{ + *spinner(batch, i) = MI_BATCH_BUFFER_END; + wmb(); +} + +static int igt_cs_tlb(void *arg) +{ + const unsigned int count = PAGE_SIZE / 64; + const unsigned int chunk_size = count * PAGE_SIZE; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *bbe, *act, *out; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct drm_file *file; + struct i915_vma *vma; + I915_RND_STATE(prng); + unsigned int i; + u32 *result; + u32 *batch; + int err = 0; + + /* + * Our mission here is to fool the hardware to execute something + * from scratch as it has not seen the batch move (due to missing + * the TLB invalidate). + */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + vm = i915_gem_context_get_vm_rcu(ctx); + if (i915_is_ggtt(vm)) + goto out_vm; + + /* Create two pages; dummy we prefill the TLB, and intended */ + bbe = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(bbe)) { + err = PTR_ERR(bbe); + goto out_vm; + } + + batch = i915_gem_object_pin_map(bbe, I915_MAP_WC); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put_bbe; + } + memset32(batch, MI_BATCH_BUFFER_END, PAGE_SIZE / sizeof(u32)); + i915_gem_object_flush_map(bbe); + i915_gem_object_unpin_map(bbe); + + act = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(act)) { + err = PTR_ERR(act); + goto out_put_bbe; + } + + /* Track the execution of each request by writing into different slot */ + batch = i915_gem_object_pin_map(act, I915_MAP_WC); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_put_act; + } + for (i = 0; i < count; i++) { + u32 *cs = batch + i * 64 / sizeof(*cs); + u64 addr = (vm->total - PAGE_SIZE) + i * sizeof(u32); + + GEM_BUG_ON(INTEL_GEN(i915) < 6); + cs[0] = MI_STORE_DWORD_IMM_GEN4; + if (INTEL_GEN(i915) >= 8) { + cs[1] = lower_32_bits(addr); + cs[2] = upper_32_bits(addr); + cs[3] = i; + cs[4] = MI_NOOP; + cs[5] = MI_BATCH_BUFFER_START_GEN8; + } else { + cs[1] = 0; + cs[2] = lower_32_bits(addr); + cs[3] = i; + cs[4] = MI_NOOP; + cs[5] = MI_BATCH_BUFFER_START; + } + } + + out = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(out)) { + err = PTR_ERR(out); + goto out_put_batch; + } + i915_gem_object_set_cache_coherency(out, I915_CACHING_CACHED); + + vma = i915_vma_instance(out, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_put_batch; + } + + err = i915_vma_pin(vma, 0, 0, + PIN_USER | + PIN_OFFSET_FIXED | + (vm->total - PAGE_SIZE)); + if (err) + goto out_put_out; + GEM_BUG_ON(vma->node.start != vm->total - PAGE_SIZE); + + result = i915_gem_object_pin_map(out, I915_MAP_WB); + if (IS_ERR(result)) { + err = PTR_ERR(result); + goto out_put_out; + } + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + IGT_TIMEOUT(end_time); + unsigned long pass = 0; + + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + while (!__igt_timeout(end_time, NULL)) { + struct i915_request *rq; + u64 offset; + + offset = igt_random_offset(&prng, + 0, vm->total - PAGE_SIZE, + chunk_size, PAGE_SIZE); + + err = vm->allocate_va_range(vm, offset, chunk_size); + if (err) + goto end; + + memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); + + vma = i915_vma_instance(bbe, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto end; + } + + err = vma->ops->set_pages(vma); + if (err) + goto end; + + /* Prime the TLB with the dummy pages */ + for (i = 0; i < count; i++) { + vma->node.start = offset + i * PAGE_SIZE; + vm->insert_entries(vm, vma, I915_CACHE_NONE, 0); + + rq = submit_batch(ce, vma->node.start); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto end; + } + i915_request_put(rq); + } + + vma->ops->clear_pages(vma); + + err = context_sync(ce); + if (err) { + pr_err("%s: dummy setup timed out\n", + ce->engine->name); + goto end; + } + + vma = i915_vma_instance(act, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto end; + } + + err = vma->ops->set_pages(vma); + if (err) + goto end; + + /* Replace the TLB with target batches */ + for (i = 0; i < count; i++) { + struct i915_request *rq; + u32 *cs = batch + i * 64 / sizeof(*cs); + u64 addr; + + vma->node.start = offset + i * PAGE_SIZE; + vm->insert_entries(vm, vma, I915_CACHE_NONE, 0); + + addr = vma->node.start + i * 64; + cs[4] = MI_NOOP; + cs[6] = lower_32_bits(addr); + cs[7] = upper_32_bits(addr); + wmb(); + + rq = submit_batch(ce, addr); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto end; + } + + /* Wait until the context chain has started */ + if (i == 0) { + while (READ_ONCE(result[i]) && + !i915_request_completed(rq)) + cond_resched(); + } else { + end_spin(batch, i - 1); + } + + i915_request_put(rq); + } + end_spin(batch, count - 1); + + vma->ops->clear_pages(vma); + + err = context_sync(ce); + if (err) { + pr_err("%s: writes timed out\n", + ce->engine->name); + goto end; + } + + for (i = 0; i < count; i++) { + if (result[i] != i) { + pr_err("%s: Write lost on pass %lu, at offset %llx, index %d, found %x, expected %x\n", + ce->engine->name, pass, + offset, i, result[i], i); + err = -EINVAL; + goto end; + } + } + + vm->clear_range(vm, offset, chunk_size); + pass++; + } + } +end: + if (igt_flush_test(i915)) + err = -EIO; + i915_gem_context_unlock_engines(ctx); + i915_gem_object_unpin_map(out); +out_put_out: + i915_gem_object_put(out); +out_put_batch: + i915_gem_object_unpin_map(act); +out_put_act: + i915_gem_object_put(act); +out_put_bbe: + i915_gem_object_put(bbe); +out_vm: + i915_vm_put(vm); +out_unlock: + mock_file_free(i915, file); + return err; +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -1720,6 +2039,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ggtt_pot), SUBTEST(igt_ggtt_fill), SUBTEST(igt_ggtt_page), + SUBTEST(igt_cs_tlb), }; GEM_BUG_ON(offset_in_page(i915->ggtt.vm.total)); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 1ccf0f731ac0..6daf6599ec79 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -15,6 +15,8 @@ selftest(workarounds, intel_workarounds_live_selftests) selftest(gt_engines, intel_engine_live_selftests) selftest(gt_timelines, intel_timeline_live_selftests) selftest(gt_contexts, intel_context_live_selftests) +selftest(gt_lrc, intel_lrc_live_selftests) +selftest(gt_pm, intel_gt_pm_live_selftests) selftest(requests, i915_request_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) @@ -33,3 +35,4 @@ selftest(reset, intel_reset_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests) selftest(guc, intel_guc_live_selftest) +selftest(perf, i915_perf_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index b88084fe3269..aa5a0e7f5d9e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -26,3 +26,4 @@ selftest(gtt, i915_gem_gtt_mock_selftests) selftest(hugepages, i915_gem_huge_page_mock_selftests) selftest(contexts, i915_gem_context_mock_selftests) selftest(buddy, i915_buddy_mock_selftests) +selftest(memory_region, intel_memory_region_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c new file mode 100644 index 000000000000..dc6d689e4251 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -0,0 +1,216 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include <linux/kref.h> + +#include "gem/i915_gem_pm.h" +#include "gt/intel_gt.h" + +#include "i915_selftest.h" + +#include "igt_flush_test.h" +#include "lib_sw_fence.h" + +static struct i915_perf_stream * +test_stream(struct i915_perf *perf) +{ + struct drm_i915_perf_open_param param = {}; + struct perf_open_properties props = { + .engine = intel_engine_lookup_user(perf->i915, + I915_ENGINE_CLASS_RENDER, + 0), + .sample_flags = SAMPLE_OA_REPORT, + .oa_format = I915_OA_FORMAT_C4_B8, + .metrics_set = 1, + }; + struct i915_perf_stream *stream; + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) + return NULL; + + stream->perf = perf; + + mutex_lock(&perf->lock); + if (i915_oa_stream_init(stream, ¶m, &props)) { + kfree(stream); + stream = NULL; + } + mutex_unlock(&perf->lock); + + return stream; +} + +static void stream_destroy(struct i915_perf_stream *stream) +{ + struct i915_perf *perf = stream->perf; + + mutex_lock(&perf->lock); + i915_perf_destroy_locked(stream); + mutex_unlock(&perf->lock); +} + +static int live_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_perf_stream *stream; + + /* Quick check we can create a perf stream */ + + stream = test_stream(&i915->perf); + if (!stream) + return -EINVAL; + + stream_destroy(stream); + return 0; +} + +static int write_timestamp(struct i915_request *rq, int slot) +{ + u32 *cs; + int len; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + len = 5; + if (INTEL_GEN(rq->i915) >= 8) + len++; + + *cs++ = GFX_OP_PIPE_CONTROL(len); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_STORE_DATA_INDEX | + PIPE_CONTROL_WRITE_TIMESTAMP; + *cs++ = slot * sizeof(u32); + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + + intel_ring_advance(rq, cs); + + return 0; +} + +static ktime_t poll_status(struct i915_request *rq, int slot) +{ + while (!intel_read_status_page(rq->engine, slot) && + !i915_request_completed(rq)) + cpu_relax(); + + return ktime_get(); +} + +static int live_noa_delay(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_perf_stream *stream; + struct i915_request *rq; + ktime_t t0, t1; + u64 expected; + u32 delay; + int err; + int i; + + /* Check that the GPU delays matches expectations */ + + stream = test_stream(&i915->perf); + if (!stream) + return -ENOMEM; + + expected = atomic64_read(&stream->perf->noa_programming_delay); + + if (stream->engine->class != RENDER_CLASS) { + err = -ENODEV; + goto out; + } + + for (i = 0; i < 4; i++) + intel_write_status_page(stream->engine, 0x100 + i, 0); + + rq = i915_request_create(stream->engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + if (rq->engine->emit_init_breadcrumb && + i915_request_timeline(rq)->has_initial_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) { + i915_request_add(rq); + goto out; + } + } + + err = write_timestamp(rq, 0x100); + if (err) { + i915_request_add(rq); + goto out; + } + + err = rq->engine->emit_bb_start(rq, + i915_ggtt_offset(stream->noa_wait), 0, + I915_DISPATCH_SECURE); + if (err) { + i915_request_add(rq); + goto out; + } + + err = write_timestamp(rq, 0x102); + if (err) { + i915_request_add(rq); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + + preempt_disable(); + t0 = poll_status(rq, 0x100); + t1 = poll_status(rq, 0x102); + preempt_enable(); + + pr_info("CPU delay: %lluns, expected %lluns\n", + ktime_sub(t1, t0), expected); + + delay = intel_read_status_page(stream->engine, 0x102); + delay -= intel_read_status_page(stream->engine, 0x100); + delay = div_u64(mul_u32_u32(delay, 1000 * 1000), + RUNTIME_INFO(i915)->cs_timestamp_frequency_khz); + pr_info("GPU delay: %uns, expected %lluns\n", + delay, expected); + + if (4 * delay < 3 * expected || 2 * delay > 3 * expected) { + pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n", + delay / 1000, + div_u64(3 * expected, 4000), + div_u64(3 * expected, 2000)); + err = -EINVAL; + } + + i915_request_put(rq); +out: + stream_destroy(stream); + return err; +} + +int i915_perf_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_sanitycheck), + SUBTEST(live_noa_delay), + }; + struct i915_perf *perf = &i915->perf; + + if (!perf->metrics_kobj || !perf->ops.enable_metric_set) + return 0; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index 716a3f19f030..abdfadcf626b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -29,6 +29,7 @@ #include <linux/types.h> #include "i915_random.h" +#include "i915_utils.h" u64 i915_prandom_u64_state(struct rnd_state *rnd) { @@ -87,3 +88,22 @@ unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) i915_random_reorder(order, count, state); return order; } + +u64 igt_random_offset(struct rnd_state *state, + u64 start, u64 end, + u64 len, u64 align) +{ + u64 range, addr; + + BUG_ON(range_overflows(start, len, end)); + BUG_ON(round_up(start, align) > round_down(end - len, align)); + + range = round_down(end - len, align) - round_up(start, align); + if (range) { + addr = i915_prandom_u64_state(state); + div64_u64_rem(addr, range, &addr); + start += addr; + } + + return round_up(start, align); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h index 8e1ff9c105b6..35cc69a3a1b9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.h +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -57,4 +57,8 @@ void i915_random_reorder(unsigned int *order, void i915_prandom_shuffle(void *arr, size_t elsz, size_t count, struct rnd_state *state); +u64 igt_random_offset(struct rnd_state *state, + u64 start, u64 end, + u64 len, u64 align); + #endif /* !__I915_SELFTESTS_RANDOM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index b3688543ed7d..30ae34f62176 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -37,25 +37,32 @@ #include "mock_drm.h" #include "mock_gem_device.h" +static unsigned int num_uabi_engines(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + unsigned int count; + + count = 0; + for_each_uabi_engine(engine, i915) + count++; + + return count; +} + static int igt_add_request(void *arg) { struct drm_i915_private *i915 = arg; struct i915_request *request; - int err = -ENOMEM; /* Basic preliminary test to create a request and let it loose! */ - mutex_lock(&i915->drm.struct_mutex); request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10); if (!request) - goto out_unlock; + return -ENOMEM; i915_request_add(request); - err = 0; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } static int igt_wait_request(void *arg) @@ -67,12 +74,10 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ - mutex_lock(&i915->drm.struct_mutex); request = mock_request(i915->engine[RCS0]->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_unlock; - } + if (!request) + return -ENOMEM; + i915_request_get(request); if (i915_request_wait(request, 0, 0) != -ETIME) { @@ -125,9 +130,7 @@ static int igt_wait_request(void *arg) err = 0; out_request: i915_request_put(request); -out_unlock: mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -140,52 +143,45 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ - mutex_lock(&i915->drm.struct_mutex); request = mock_request(i915->engine[RCS0]->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_locked; - } + if (!request) + return -ENOMEM; if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { pr_err("fence wait success before submit (expected timeout)!\n"); - goto out_locked; + goto out; } i915_request_add(request); - mutex_unlock(&i915->drm.struct_mutex); if (dma_fence_is_signaled(&request->fence)) { pr_err("fence signaled immediately!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { pr_err("fence wait success after submit (expected timeout)!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { pr_err("fence wait timed out (expected success)!\n"); - goto out_device; + goto out; } if (!dma_fence_is_signaled(&request->fence)) { pr_err("fence unsignaled after waiting!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { pr_err("fence wait timed out when complete (expected success)!\n"); - goto out_device; + goto out; } err = 0; -out_device: - mutex_lock(&i915->drm.struct_mutex); -out_locked: +out: mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -197,8 +193,8 @@ static int igt_request_rewind(void *arg) struct intel_context *ce; int err = -EINVAL; - mutex_lock(&i915->drm.struct_mutex); ctx[0] = mock_context(i915, "A"); + ce = i915_gem_context_get_engine(ctx[0], RCS0); GEM_BUG_ON(IS_ERR(ce)); request = mock_request(ce, 2 * HZ); @@ -212,6 +208,7 @@ static int igt_request_rewind(void *arg) i915_request_add(request); ctx[1] = mock_context(i915, "B"); + ce = i915_gem_context_get_engine(ctx[1], RCS0); GEM_BUG_ON(IS_ERR(ce)); vip = mock_request(ce, 0); @@ -233,7 +230,6 @@ static int igt_request_rewind(void *arg) request->engine->submit_request(request); rcu_read_unlock(); - mutex_unlock(&i915->drm.struct_mutex); if (i915_request_wait(vip, 0, HZ) == -ETIME) { pr_err("timed out waiting for high priority request\n"); @@ -248,14 +244,12 @@ static int igt_request_rewind(void *arg) err = 0; err: i915_request_put(vip); - mutex_lock(&i915->drm.struct_mutex); err_context_1: mock_context_close(ctx[1]); i915_request_put(request); err_context_0: mock_context_close(ctx[0]); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -282,7 +276,6 @@ __live_request_alloc(struct intel_context *ce) static int __igt_breadcrumbs_smoketest(void *arg) { struct smoketest *t = arg; - struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; const unsigned int total = 4 * t->ncontexts + 1; unsigned int num_waits = 0, num_fences = 0; @@ -300,7 +293,7 @@ static int __igt_breadcrumbs_smoketest(void *arg) * that the fences were marked as signaled. */ - requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); + requests = kcalloc(total, sizeof(*requests), GFP_KERNEL); if (!requests) return -ENOMEM; @@ -337,14 +330,11 @@ static int __igt_breadcrumbs_smoketest(void *arg) struct i915_request *rq; struct intel_context *ce; - mutex_lock(BKL); - ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); rq = t->request_alloc(ce); intel_context_put(ce); if (IS_ERR(rq)) { - mutex_unlock(BKL); err = PTR_ERR(rq); count = n; break; @@ -357,8 +347,6 @@ static int __igt_breadcrumbs_smoketest(void *arg) requests[n] = i915_request_get(rq); i915_request_add(rq); - mutex_unlock(BKL); - if (err >= 0) err = i915_sw_fence_await_dma_fence(wait, &rq->fence, @@ -446,18 +434,16 @@ static int mock_breadcrumbs_smoketest(void *arg) * See __igt_breadcrumbs_smoketest(); */ - threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); + threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL); if (!threads) return -ENOMEM; - t.contexts = - kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); + t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); if (!t.contexts) { ret = -ENOMEM; goto out_threads; } - mutex_lock(&t.engine->i915->drm.struct_mutex); for (n = 0; n < t.ncontexts; n++) { t.contexts[n] = mock_context(t.engine->i915, "mock"); if (!t.contexts[n]) { @@ -465,7 +451,6 @@ static int mock_breadcrumbs_smoketest(void *arg) goto out_contexts; } } - mutex_unlock(&t.engine->i915->drm.struct_mutex); for (n = 0; n < ncpus; n++) { threads[n] = kthread_run(__igt_breadcrumbs_smoketest, @@ -495,18 +480,15 @@ static int mock_breadcrumbs_smoketest(void *arg) atomic_long_read(&t.num_fences), ncpus); - mutex_lock(&t.engine->i915->drm.struct_mutex); out_contexts: for (n = 0; n < t.ncontexts; n++) { if (!t.contexts[n]) break; mock_context_close(t.contexts[n]); } - mutex_unlock(&t.engine->i915->drm.struct_mutex); kfree(t.contexts); out_threads: kfree(threads); - return ret; } @@ -539,40 +521,37 @@ static int live_nop_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; - unsigned int id; int err = -ENODEV; - /* Submit various sized batches of empty requests, to each engine + /* + * Submit various sized batches of empty requests, to each engine * (individually), and wait for the batch to complete. We can check * the overhead of submitting requests to the hardware. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - for_each_engine(engine, i915, id) { - struct i915_request *request = NULL; + for_each_uabi_engine(engine, i915) { unsigned long n, prime; IGT_TIMEOUT(end_time); ktime_t times[2] = {}; err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + return err; for_each_prime_number_from(prime, 1, 8192) { + struct i915_request *request = NULL; + times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { + i915_request_put(request); request = i915_request_create(engine->kernel_context); - if (IS_ERR(request)) { - err = PTR_ERR(request); - goto out_unlock; - } + if (IS_ERR(request)) + return PTR_ERR(request); - /* This space is left intentionally blank. + /* + * This space is left intentionally blank. * * We do not actually want to perform any * action with this request, we just want @@ -585,9 +564,11 @@ static int live_nop_request(void *arg) * for latency. */ + i915_request_get(request); i915_request_add(request); } i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(request); times[1] = ktime_sub(ktime_get_raw(), times[1]); if (prime == 1) @@ -599,7 +580,7 @@ static int live_nop_request(void *arg) err = igt_live_test_end(&t); if (err) - goto out_unlock; + return err; pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -607,9 +588,6 @@ static int live_nop_request(void *arg) prime, div64_u64(ktime_to_ns(times[1]), prime)); } -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -647,8 +625,15 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) if (err) goto err; + /* Force the wait wait now to avoid including it in the benchmark */ + err = i915_vma_sync(vma); + if (err) + goto err_pin; + return vma; +err_pin: + i915_vma_unpin(vma); err: i915_gem_object_put(obj); return ERR_PTR(err); @@ -672,6 +657,7 @@ empty_request(struct intel_engine_cs *engine, if (err) goto out_request; + i915_request_get(request); out_request: i915_request_add(request); return err ? ERR_PTR(err) : request; @@ -681,27 +667,21 @@ static int live_empty_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; struct i915_vma *batch; - unsigned int id; int err = 0; - /* Submit various sized batches of empty requests, to each engine + /* + * Submit various sized batches of empty requests, to each engine * (individually), and wait for the batch to complete. We can check * the overhead of submitting requests to the hardware. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - batch = empty_batch(i915); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_unlock; - } + if (IS_ERR(batch)) + return PTR_ERR(batch); - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { IGT_TIMEOUT(end_time); struct i915_request *request; unsigned long n, prime; @@ -723,6 +703,7 @@ static int live_empty_request(void *arg) times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { + i915_request_put(request); request = empty_request(engine, batch); if (IS_ERR(request)) { err = PTR_ERR(request); @@ -738,6 +719,7 @@ static int live_empty_request(void *arg) if (__igt_timeout(end_time, NULL)) break; } + i915_request_put(request); err = igt_live_test_end(&t); if (err) @@ -752,18 +734,15 @@ static int live_empty_request(void *arg) out_batch: i915_vma_unpin(batch); i915_vma_put(batch); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } static struct i915_vma *recursive_batch(struct drm_i915_private *i915) { struct i915_gem_context *ctx = i915->kernel_context; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct drm_i915_gem_object *obj; const int gen = INTEL_GEN(i915); + struct i915_address_space *vm; struct i915_vma *vma; u32 *cmd; int err; @@ -772,7 +751,9 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) if (IS_ERR(obj)) return ERR_CAST(obj); + vm = i915_gem_context_get_vm_rcu(ctx); vma = i915_vma_instance(obj, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -832,67 +813,73 @@ static int recursive_batch_resolve(struct i915_vma *batch) static int live_all_engines(void *arg) { struct drm_i915_private *i915 = arg; + const unsigned int nengines = num_uabi_engines(i915); struct intel_engine_cs *engine; - struct i915_request *request[I915_NUM_ENGINES]; - intel_wakeref_t wakeref; + struct i915_request **request; struct igt_live_test t; struct i915_vma *batch; - unsigned int id; + unsigned int idx; int err; - /* Check we can submit requests to all engines simultaneously. We + /* + * Check we can submit requests to all engines simultaneously. We * send a recursive batch to each engine - checking that we don't * block doing so, and that they don't complete too soon. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); + if (!request) + return -ENOMEM; err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_free; batch = recursive_batch(i915); if (IS_ERR(batch)) { err = PTR_ERR(batch); pr_err("%s: Unable to create batch, err=%d\n", __func__, err); - goto out_unlock; + goto out_free; } - for_each_engine(engine, i915, id) { - request[id] = i915_request_create(engine->kernel_context); - if (IS_ERR(request[id])) { - err = PTR_ERR(request[id]); + idx = 0; + for_each_uabi_engine(engine, i915) { + request[idx] = i915_request_create(engine->kernel_context); + if (IS_ERR(request[idx])) { + err = PTR_ERR(request[idx]); pr_err("%s: Request allocation failed with err=%d\n", __func__, err); goto out_request; } - err = engine->emit_bb_start(request[id], + err = engine->emit_bb_start(request[idx], batch->node.start, batch->node.size, 0); GEM_BUG_ON(err); - request[id]->batch = batch; + request[idx]->batch = batch; i915_vma_lock(batch); - err = i915_request_await_object(request[id], batch->obj, 0); + err = i915_request_await_object(request[idx], batch->obj, 0); if (err == 0) - err = i915_vma_move_to_active(batch, request[id], 0); + err = i915_vma_move_to_active(batch, request[idx], 0); i915_vma_unlock(batch); GEM_BUG_ON(err); - i915_request_get(request[id]); - i915_request_add(request[id]); + i915_request_get(request[idx]); + i915_request_add(request[idx]); + idx++; } - for_each_engine(engine, i915, id) { - if (i915_request_completed(request[id])) { + idx = 0; + for_each_uabi_engine(engine, i915) { + if (i915_request_completed(request[idx])) { pr_err("%s(%s): request completed too early!\n", __func__, engine->name); err = -EINVAL; goto out_request; } + idx++; } err = recursive_batch_resolve(batch); @@ -901,10 +888,11 @@ static int live_all_engines(void *arg) goto out_request; } - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { long timeout; - timeout = i915_request_wait(request[id], 0, + timeout = i915_request_wait(request[idx], 0, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) { err = timeout; @@ -913,50 +901,56 @@ static int live_all_engines(void *arg) goto out_request; } - GEM_BUG_ON(!i915_request_completed(request[id])); - i915_request_put(request[id]); - request[id] = NULL; + GEM_BUG_ON(!i915_request_completed(request[idx])); + i915_request_put(request[idx]); + request[idx] = NULL; + idx++; } err = igt_live_test_end(&t); out_request: - for_each_engine(engine, i915, id) - if (request[id]) - i915_request_put(request[id]); + idx = 0; + for_each_uabi_engine(engine, i915) { + if (request[idx]) + i915_request_put(request[idx]); + idx++; + } i915_vma_unpin(batch); i915_vma_put(batch); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); +out_free: + kfree(request); return err; } static int live_sequential_engines(void *arg) { struct drm_i915_private *i915 = arg; - struct i915_request *request[I915_NUM_ENGINES] = {}; + const unsigned int nengines = num_uabi_engines(i915); + struct i915_request **request; struct i915_request *prev = NULL; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; - unsigned int id; + unsigned int idx; int err; - /* Check we can submit requests to all engines sequentially, such + /* + * Check we can submit requests to all engines sequentially, such * that each successive request waits for the earlier ones. This * tests that we don't execute requests out of order, even though * they are running on independent engines. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); + request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); + if (!request) + return -ENOMEM; err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_free; - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { struct i915_vma *batch; batch = recursive_batch(i915); @@ -964,66 +958,69 @@ static int live_sequential_engines(void *arg) err = PTR_ERR(batch); pr_err("%s: Unable to create batch for %s, err=%d\n", __func__, engine->name, err); - goto out_unlock; + goto out_free; } - request[id] = i915_request_create(engine->kernel_context); - if (IS_ERR(request[id])) { - err = PTR_ERR(request[id]); + request[idx] = i915_request_create(engine->kernel_context); + if (IS_ERR(request[idx])) { + err = PTR_ERR(request[idx]); pr_err("%s: Request allocation failed for %s with err=%d\n", __func__, engine->name, err); goto out_request; } if (prev) { - err = i915_request_await_dma_fence(request[id], + err = i915_request_await_dma_fence(request[idx], &prev->fence); if (err) { - i915_request_add(request[id]); + i915_request_add(request[idx]); pr_err("%s: Request await failed for %s with err=%d\n", __func__, engine->name, err); goto out_request; } } - err = engine->emit_bb_start(request[id], + err = engine->emit_bb_start(request[idx], batch->node.start, batch->node.size, 0); GEM_BUG_ON(err); - request[id]->batch = batch; + request[idx]->batch = batch; i915_vma_lock(batch); - err = i915_request_await_object(request[id], batch->obj, false); + err = i915_request_await_object(request[idx], + batch->obj, false); if (err == 0) - err = i915_vma_move_to_active(batch, request[id], 0); + err = i915_vma_move_to_active(batch, request[idx], 0); i915_vma_unlock(batch); GEM_BUG_ON(err); - i915_request_get(request[id]); - i915_request_add(request[id]); + i915_request_get(request[idx]); + i915_request_add(request[idx]); - prev = request[id]; + prev = request[idx]; + idx++; } - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { long timeout; - if (i915_request_completed(request[id])) { + if (i915_request_completed(request[idx])) { pr_err("%s(%s): request completed too early!\n", __func__, engine->name); err = -EINVAL; goto out_request; } - err = recursive_batch_resolve(request[id]->batch); + err = recursive_batch_resolve(request[idx]->batch); if (err) { pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); goto out_request; } - timeout = i915_request_wait(request[id], 0, + timeout = i915_request_wait(request[idx], 0, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) { err = timeout; @@ -1032,33 +1029,154 @@ static int live_sequential_engines(void *arg) goto out_request; } - GEM_BUG_ON(!i915_request_completed(request[id])); + GEM_BUG_ON(!i915_request_completed(request[idx])); + idx++; } err = igt_live_test_end(&t); out_request: - for_each_engine(engine, i915, id) { + idx = 0; + for_each_uabi_engine(engine, i915) { u32 *cmd; - if (!request[id]) + if (!request[idx]) break; - cmd = i915_gem_object_pin_map(request[id]->batch->obj, + cmd = i915_gem_object_pin_map(request[idx]->batch->obj, I915_MAP_WC); if (!IS_ERR(cmd)) { *cmd = MI_BATCH_BUFFER_END; intel_gt_chipset_flush(engine->gt); - i915_gem_object_unpin_map(request[id]->batch->obj); + i915_gem_object_unpin_map(request[idx]->batch->obj); } - i915_vma_put(request[id]->batch); - i915_request_put(request[id]); + i915_vma_put(request[idx]->batch); + i915_request_put(request[idx]); + idx++; } -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); +out_free: + kfree(request); + return err; +} + +static int __live_parallel_engine1(void *arg) +{ + struct intel_engine_cs *engine = arg; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + struct i915_request *rq; + int err; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + + err = 0; + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + return err; + + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu request + sync\n", engine->name, count); + return 0; +} + +static int __live_parallel_engineN(void *arg) +{ + struct intel_engine_cs *engine = arg; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + struct i915_request *rq; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + count++; + } while (!__igt_timeout(end_time, NULL)); + + pr_info("%s: %lu requests\n", engine->name, count); + return 0; +} + +static int live_parallel_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + __live_parallel_engine1, + __live_parallel_engineN, + NULL, + }; + const unsigned int nengines = num_uabi_engines(i915); + struct intel_engine_cs *engine; + int (* const *fn)(void *arg); + struct task_struct **tsk; + int err = 0; + + /* + * Check we can submit requests to all engines concurrently. This + * tests that we load up the system maximally. + */ + + tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL); + if (!tsk) + return -ENOMEM; + + for (fn = func; !err && *fn; fn++) { + struct igt_live_test t; + unsigned int idx; + + err = igt_live_test_begin(&t, i915, __func__, ""); + if (err) + break; + + idx = 0; + for_each_uabi_engine(engine, i915) { + tsk[idx] = kthread_run(*fn, engine, + "igt/parallel:%s", + engine->name); + if (IS_ERR(tsk[idx])) { + err = PTR_ERR(tsk[idx]); + break; + } + get_task_struct(tsk[idx++]); + } + + idx = 0; + for_each_uabi_engine(engine, i915) { + int status; + + if (IS_ERR(tsk[idx])) + break; + + status = kthread_stop(tsk[idx]); + if (status && !err) + err = status; + + put_task_struct(tsk[idx++]); + } + + if (igt_live_test_end(&t)) + err = -EIO; + } + + kfree(tsk); return err; } @@ -1102,16 +1220,16 @@ max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) static int live_breadcrumbs_smoketest(void *arg) { struct drm_i915_private *i915 = arg; - struct smoketest t[I915_NUM_ENGINES]; - unsigned int ncpus = num_online_cpus(); + const unsigned int nengines = num_uabi_engines(i915); + const unsigned int ncpus = num_online_cpus(); unsigned long num_waits, num_fences; struct intel_engine_cs *engine; struct task_struct **threads; struct igt_live_test live; - enum intel_engine_id id; intel_wakeref_t wakeref; struct drm_file *file; - unsigned int n; + struct smoketest *smoke; + unsigned int n, idx; int ret = 0; /* @@ -1130,29 +1248,31 @@ static int live_breadcrumbs_smoketest(void *arg) goto out_rpm; } - threads = kcalloc(ncpus * I915_NUM_ENGINES, - sizeof(*threads), - GFP_KERNEL); - if (!threads) { + smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL); + if (!smoke) { ret = -ENOMEM; goto out_file; } - memset(&t[0], 0, sizeof(t[0])); - t[0].request_alloc = __live_request_alloc; - t[0].ncontexts = 64; - t[0].contexts = kmalloc_array(t[0].ncontexts, - sizeof(*t[0].contexts), - GFP_KERNEL); - if (!t[0].contexts) { + threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL); + if (!threads) { + ret = -ENOMEM; + goto out_smoke; + } + + smoke[0].request_alloc = __live_request_alloc; + smoke[0].ncontexts = 64; + smoke[0].contexts = kcalloc(smoke[0].ncontexts, + sizeof(*smoke[0].contexts), + GFP_KERNEL); + if (!smoke[0].contexts) { ret = -ENOMEM; goto out_threads; } - mutex_lock(&i915->drm.struct_mutex); - for (n = 0; n < t[0].ncontexts; n++) { - t[0].contexts[n] = live_context(i915, file); - if (!t[0].contexts[n]) { + for (n = 0; n < smoke[0].ncontexts; n++) { + smoke[0].contexts[n] = live_context(i915, file); + if (!smoke[0].contexts[n]) { ret = -ENOMEM; goto out_contexts; } @@ -1162,45 +1282,47 @@ static int live_breadcrumbs_smoketest(void *arg) if (ret) goto out_contexts; - for_each_engine(engine, i915, id) { - t[id] = t[0]; - t[id].engine = engine; - t[id].max_batch = max_batches(t[0].contexts[0], engine); - if (t[id].max_batch < 0) { - ret = t[id].max_batch; - mutex_unlock(&i915->drm.struct_mutex); + idx = 0; + for_each_uabi_engine(engine, i915) { + smoke[idx] = smoke[0]; + smoke[idx].engine = engine; + smoke[idx].max_batch = + max_batches(smoke[0].contexts[0], engine); + if (smoke[idx].max_batch < 0) { + ret = smoke[idx].max_batch; goto out_flush; } /* One ring interleaved between requests from all cpus */ - t[id].max_batch /= num_online_cpus() + 1; + smoke[idx].max_batch /= num_online_cpus() + 1; pr_debug("Limiting batches to %d requests on %s\n", - t[id].max_batch, engine->name); + smoke[idx].max_batch, engine->name); for (n = 0; n < ncpus; n++) { struct task_struct *tsk; tsk = kthread_run(__igt_breadcrumbs_smoketest, - &t[id], "igt/%d.%d", id, n); + &smoke[idx], "igt/%d.%d", idx, n); if (IS_ERR(tsk)) { ret = PTR_ERR(tsk); - mutex_unlock(&i915->drm.struct_mutex); goto out_flush; } get_task_struct(tsk); - threads[id * ncpus + n] = tsk; + threads[idx * ncpus + n] = tsk; } + + idx++; } - mutex_unlock(&i915->drm.struct_mutex); msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); out_flush: + idx = 0; num_waits = 0; num_fences = 0; - for_each_engine(engine, i915, id) { + for_each_uabi_engine(engine, i915) { for (n = 0; n < ncpus; n++) { - struct task_struct *tsk = threads[id * ncpus + n]; + struct task_struct *tsk = threads[idx * ncpus + n]; int err; if (!tsk) @@ -1213,19 +1335,20 @@ out_flush: put_task_struct(tsk); } - num_waits += atomic_long_read(&t[id].num_waits); - num_fences += atomic_long_read(&t[id].num_fences); + num_waits += atomic_long_read(&smoke[idx].num_waits); + num_fences += atomic_long_read(&smoke[idx].num_fences); + idx++; } pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); - mutex_lock(&i915->drm.struct_mutex); ret = igt_live_test_end(&live) ?: ret; out_contexts: - mutex_unlock(&i915->drm.struct_mutex); - kfree(t[0].contexts); + kfree(smoke[0].contexts); out_threads: kfree(threads); +out_smoke: + kfree(smoke); out_file: mock_file_free(i915, file); out_rpm: @@ -1240,6 +1363,7 @@ int i915_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_nop_request), SUBTEST(live_all_engines), SUBTEST(live_sequential_engines), + SUBTEST(live_parallel_engines), SUBTEST(live_empty_request), SUBTEST(live_breadcrumbs_smoketest), }; diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 438ea0eaa416..825a8286cbe8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -263,10 +263,8 @@ int __i915_live_teardown(int err, void *data) { struct drm_i915_private *i915 = data; - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); i915_gem_drain_freed_objects(i915); @@ -284,10 +282,8 @@ int __intel_gt_live_teardown(int err, void *data) { struct intel_gt *gt = data; - mutex_lock(>->i915->drm.struct_mutex); - if (igt_flush_test(gt->i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; - mutex_unlock(>->i915->drm.struct_mutex); i915_gem_drain_freed_objects(gt->i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index a5bec0a4cdcc..58b5f40a07dd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -24,6 +24,7 @@ #include <linux/prime_numbers.h> +#include "gem/i915_gem_context.h" #include "gem/selftests/mock_context.h" #include "i915_scatterlist.h" @@ -38,7 +39,7 @@ static bool assert_vma(struct i915_vma *vma, { bool ok = true; - if (vma->vm != ctx->vm) { + if (vma->vm != rcu_access_pointer(ctx->vm)) { pr_err("VMA created with wrong VM\n"); ok = false; } @@ -113,11 +114,13 @@ static int create_vmas(struct drm_i915_private *i915, list_for_each_entry(obj, objects, st_link) { for (pinned = 0; pinned <= 1; pinned++) { list_for_each_entry(ctx, contexts, link) { - struct i915_address_space *vm = ctx->vm; + struct i915_address_space *vm; struct i915_vma *vma; int err; + vm = i915_gem_context_get_vm_rcu(ctx); vma = checked_vma_instance(obj, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -170,7 +173,7 @@ static int igt_vma_create(void *arg) } nc = 0; - for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) { + for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) { for (; nc < num_ctx; nc++) { ctx = mock_context(i915, "mock"); if (!ctx) @@ -623,7 +626,7 @@ static bool assert_partial(struct drm_i915_gem_object *obj, struct sgt_iter sgt; dma_addr_t dma; - for_each_sgt_dma(dma, sgt, vma->pages) { + for_each_sgt_daddr(dma, sgt, vma->pages) { dma_addr_t src; if (!size) { @@ -831,13 +834,10 @@ int i915_vma_mock_selftests(void) } mock_init_ggtt(i915, ggtt); - mutex_lock(&i915->drm.struct_mutex); err = i915_subtests(tests, ggtt); - mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); + mock_device_flush(i915); i915_gem_drain_freed_objects(i915); - mock_fini_ggtt(ggtt); kfree(ggtt); out_put: @@ -879,8 +879,6 @@ static int igt_vma_remapped_gtt(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); for (t = types; *t; t++) { @@ -976,7 +974,6 @@ static int igt_vma_remapped_gtt(void *arg) out: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_put(obj); return err; diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index d3b5eb402d33..7b0939e3f007 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -4,39 +4,32 @@ * Copyright © 2018 Intel Corporation */ -#include "gem/i915_gem_context.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_selftest.h" #include "igt_flush_test.h" -int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) +int igt_flush_test(struct drm_i915_private *i915) { - int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0; - int repeat = !!(flags & I915_WAIT_LOCKED); + struct intel_gt *gt = &i915->gt; + int ret = intel_gt_is_wedged(gt) ? -EIO : 0; cond_resched(); - do { - if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) { - pr_err("%pS timed out, cancelling all further testing.\n", - __builtin_return_address(0)); + if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) { + pr_err("%pS timed out, cancelling all further testing.\n", + __builtin_return_address(0)); - GEM_TRACE("%pS timed out.\n", - __builtin_return_address(0)); - GEM_TRACE_DUMP(); + GEM_TRACE("%pS timed out.\n", + __builtin_return_address(0)); + GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); - repeat = 0; - ret = -EIO; - } - - /* Ensure we also flush after wedging. */ - if (flags & I915_WAIT_LOCKED) - i915_retire_requests(i915); - } while (repeat--); + intel_gt_set_wedged(gt); + ret = -EIO; + } return ret; } diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h index 63e009927c43..7541fa74e641 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.h +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h @@ -9,6 +9,6 @@ struct drm_i915_private; -int igt_flush_test(struct drm_i915_private *i915, unsigned int flags); +int igt_flush_test(struct drm_i915_private *i915); #endif /* IGT_FLUSH_TEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 3e902761cd16..810b60100c2c 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -4,7 +4,8 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_drv.h" +#include "i915_drv.h" +#include "gt/intel_gt_requests.h" #include "../i915_selftest.h" #include "igt_flush_test.h" @@ -19,16 +20,11 @@ int igt_live_test_begin(struct igt_live_test *t, enum intel_engine_id id; int err; - lockdep_assert_held(&i915->drm.struct_mutex); - t->i915 = i915; t->func = func; t->name = name; - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); + err = intel_gt_wait_for_idle(&i915->gt, MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", func, name, err); @@ -50,9 +46,7 @@ int igt_live_test_end(struct igt_live_test *t) struct intel_engine_cs *engine; enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) return -EIO; if (t->reset_global != i915_reset_count(&i915->gpu_error)) { diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c index 7ec8f8b049c6..9f8590b868a9 100644 --- a/drivers/gpu/drm/i915/selftests/igt_reset.c +++ b/drivers/gpu/drm/i915/selftests/igt_reset.c @@ -22,7 +22,7 @@ void igt_global_reset_lock(struct intel_gt *gt) wait_event(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, >->reset.flags)); - for_each_engine(engine, gt->i915, id) { + for_each_engine(engine, gt, id) { while (test_and_set_bit(I915_RESET_ENGINE + id, >->reset.flags)) wait_on_bit(>->reset.flags, I915_RESET_ENGINE + id, @@ -35,7 +35,7 @@ void igt_global_reset_unlock(struct intel_gt *gt) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt->i915, id) + for_each_engine(engine, gt, id) clear_bit(I915_RESET_ENGINE + id, >->reset.flags); clear_bit(I915_RESET_BACKOFF, >->reset.flags); diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 11f04ad48e68..ee8450b871da 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -147,7 +147,7 @@ igt_spinner_create_request(struct igt_spinner *spin, intel_gt_chipset_flush(engine->gt); if (engine->emit_init_breadcrumb && - rq->timeline->has_initial_breadcrumb) { + i915_request_timeline(rq)->has_initial_breadcrumb) { err = engine->emit_init_breadcrumb(rq); if (err) goto cancel_rq; diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c new file mode 100644 index 000000000000..56091e7e599e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include <linux/prime_numbers.h> + +#include "../i915_selftest.h" + +#include "mock_drm.h" +#include "mock_gem_device.h" +#include "mock_region.h" + +#include "gem/i915_gem_region.h" +#include "gem/selftests/mock_context.h" +#include "selftests/i915_random.h" + +static void close_objects(struct intel_memory_region *mem, + struct list_head *objects) +{ + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + if (i915_gem_object_has_pinned_pages(obj)) + i915_gem_object_unpin_pages(obj); + /* No polluting the memory region between tests */ + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + list_del(&obj->st_link); + i915_gem_object_put(obj); + } + + cond_resched(); + + i915_gem_drain_freed_objects(i915); +} + +static int igt_mock_fill(void *arg) +{ + struct intel_memory_region *mem = arg; + resource_size_t total = resource_size(&mem->region); + resource_size_t page_size; + resource_size_t rem; + unsigned long max_pages; + unsigned long page_num; + LIST_HEAD(objects); + int err = 0; + + page_size = mem->mm.chunk_size; + max_pages = div64_u64(total, page_size); + rem = total; + + for_each_prime_number_from(page_num, 1, max_pages) { + resource_size_t size = page_num * page_size; + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_create_region(mem, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + break; + } + + list_add(&obj->st_link, &objects); + rem -= size; + } + + if (err == -ENOMEM) + err = 0; + if (err == -ENXIO) { + if (page_num * page_size <= rem) { + pr_err("%s failed, space still left in region\n", + __func__); + err = -EINVAL; + } else { + err = 0; + } + } + + close_objects(mem, &objects); + + return err; +} + +static struct drm_i915_gem_object * +igt_object_create(struct intel_memory_region *mem, + struct list_head *objects, + u64 size, + unsigned int flags) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mem, size, flags); + if (IS_ERR(obj)) + return obj; + + err = i915_gem_object_pin_pages(obj); + if (err) + goto put; + + list_add(&obj->st_link, objects); + return obj; + +put: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static void igt_object_release(struct drm_i915_gem_object *obj) +{ + i915_gem_object_unpin_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); + list_del(&obj->st_link); + i915_gem_object_put(obj); +} + +static int igt_mock_contiguous(void *arg) +{ + struct intel_memory_region *mem = arg; + struct drm_i915_gem_object *obj; + unsigned long n_objects; + LIST_HEAD(objects); + LIST_HEAD(holes); + I915_RND_STATE(prng); + resource_size_t total; + resource_size_t min; + u64 target; + int err = 0; + + total = resource_size(&mem->region); + + /* Min size */ + obj = igt_object_create(mem, &objects, mem->mm.chunk_size, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->mm.pages->nents != 1) { + pr_err("%s min object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* Max size */ + obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->mm.pages->nents != 1) { + pr_err("%s max object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* Internal fragmentation should not bleed into the object size */ + target = i915_prandom_u64_state(&prng); + div64_u64_rem(target, total, &target); + target = round_up(target, PAGE_SIZE); + target = max_t(u64, PAGE_SIZE, target); + + obj = igt_object_create(mem, &objects, target, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + if (obj->base.size != target) { + pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__, + obj->base.size, target); + err = -EINVAL; + goto err_close_objects; + } + + if (obj->mm.pages->nents != 1) { + pr_err("%s object spans multiple sg entries\n", __func__); + err = -EINVAL; + goto err_close_objects; + } + + igt_object_release(obj); + + /* + * Try to fragment the address space, such that half of it is free, but + * the max contiguous block size is SZ_64K. + */ + + target = SZ_64K; + n_objects = div64_u64(total, target); + + while (n_objects--) { + struct list_head *list; + + if (n_objects % 2) + list = &holes; + else + list = &objects; + + obj = igt_object_create(mem, list, target, + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; + } + } + + close_objects(mem, &holes); + + min = target; + target = total >> 1; + + /* Make sure we can still allocate all the fragmented space */ + obj = igt_object_create(mem, &objects, target, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_close_objects; + } + + igt_object_release(obj); + + /* + * Even though we have enough free space, we don't have a big enough + * contiguous block. Make sure that holds true. + */ + + do { + bool should_fail = target > min; + + obj = igt_object_create(mem, &objects, target, + I915_BO_ALLOC_CONTIGUOUS); + if (should_fail != IS_ERR(obj)) { + pr_err("%s target allocation(%llx) mismatch\n", + __func__, target); + err = -EINVAL; + goto err_close_objects; + } + + target >>= 1; + } while (target >= mem->mm.chunk_size); + +err_close_objects: + list_splice_tail(&holes, &objects); + close_objects(mem, &objects); + return err; +} + +int intel_memory_region_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_fill), + SUBTEST(igt_mock_contiguous), + }; + struct intel_memory_region *mem; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); + if (IS_ERR(mem)) { + pr_err("failed to create memory region\n"); + err = PTR_ERR(mem); + goto out_unref; + } + + err = i915_subtests(tests, mem); + + intel_memory_region_put(mem); +out_unref: + drm_dev_put(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 86815c6072a1..0ffb141eb988 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -67,6 +67,7 @@ static int intel_shadow_table_check(void) } reg_lists[] = { { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, + { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, }; const i915_reg_t *reg; unsigned int i, j; @@ -101,6 +102,7 @@ int intel_uncore_mock_selftests(void) { __chv_fw_ranges, ARRAY_SIZE(__chv_fw_ranges), false }, { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, { __gen11_fw_ranges, ARRAY_SIZE(__gen11_fw_ranges), true }, + { __gen12_fw_ranges, ARRAY_SIZE(__gen12_fw_ranges), true }, }; int err, i; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 01a89c071bf5..cb8c3a501cc7 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -26,12 +26,14 @@ #include <linux/pm_runtime.h> #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "gt/mock_engine.h" #include "mock_request.h" #include "mock_gem_device.h" #include "mock_gtt.h" #include "mock_uncore.h" +#include "mock_region.h" #include "gem/selftests/mock_context.h" #include "gem/selftests/mock_gem_object.h" @@ -41,12 +43,11 @@ void mock_device_flush(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - do { for_each_engine(engine, i915, id) mock_engine_flush(engine); - } while (i915_retire_requests(i915)); + } while (intel_gt_retire_requests_timeout(&i915->gt, + MAX_SCHEDULE_TIMEOUT)); } static void mock_device_release(struct drm_device *dev) @@ -55,31 +56,23 @@ static void mock_device_release(struct drm_device *dev) struct intel_engine_cs *engine; enum intel_engine_id id; - mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - flush_work(&i915->gem.idle_work); i915_gem_drain_workqueue(i915); - mutex_lock(&i915->drm.struct_mutex); for_each_engine(engine, i915, id) mock_engine_free(engine); - i915_gem_contexts_fini(i915); - mutex_unlock(&i915->drm.struct_mutex); + i915_gem_driver_release__contexts(i915); intel_timelines_fini(i915); drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); - mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(&i915->ggtt); - mutex_unlock(&i915->drm.struct_mutex); - destroy_workqueue(i915->wq); - i915_gemfs_fini(i915); + i915_gem_cleanup_memory_regions(i915); drm_mode_config_cleanup(&i915->drm); @@ -103,14 +96,6 @@ static void release_dev(struct device *dev) kfree(pdev); } -static void mock_retire_work_handler(struct work_struct *work) -{ -} - -static void mock_idle_work_handler(struct work_struct *work) -{ -} - static int pm_domain_resume(struct device *dev) { return pm_generic_runtime_resume(dev); @@ -178,10 +163,14 @@ struct drm_i915_private *mock_gem_device(void) I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_2M; - mock_uncore_init(&i915->uncore); + mkwrite_device_info(i915)->memory_regions = REGION_SMEM; + + mock_uncore_init(&i915->uncore, i915); + i915_gem_init__mm(i915); intel_gt_init_early(&i915->gt, i915); atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ + i915->gt.awake = -ENODEV; i915->wq = alloc_ordered_workqueue("mock", 0); if (!i915->wq) @@ -189,15 +178,8 @@ struct drm_i915_private *mock_gem_device(void) mock_init_contexts(i915); - INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler); - INIT_WORK(&i915->gem.idle_work, mock_idle_work_handler); - - i915->gt.awake = true; - intel_timelines_init(i915); - mutex_lock(&i915->drm.struct_mutex); - mock_init_ggtt(i915, &i915->ggtt); mkwrite_device_info(i915)->engine_mask = BIT(0); @@ -214,18 +196,18 @@ struct drm_i915_private *mock_gem_device(void) goto err_context; intel_engines_driver_register(i915); - mutex_unlock(&i915->drm.struct_mutex); - WARN_ON(i915_gemfs_init(i915)); + err = i915_gem_init_memory_regions(i915); + if (err) + goto err_context; return i915; err_context: - i915_gem_contexts_fini(i915); + i915_gem_driver_release__contexts(i915); err_engine: mock_engine_free(i915->engine[RCS0]); err_unlock: - mutex_unlock(&i915->drm.struct_mutex); intel_timelines_fini(i915); destroy_workqueue(i915->wq); err_drv: diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index e62a67e0f79c..173f2d4dbd14 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -43,7 +43,7 @@ static int mock_bind_ppgtt(struct i915_vma *vma, u32 flags) { GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); - vma->flags |= I915_VMA_LOCAL_BIND; + set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma)); return 0; } @@ -86,7 +86,7 @@ static int mock_bind_ggtt(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { - vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); return 0; } @@ -118,7 +118,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt) i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); - intel_gt_init_hw(i915); + intel_gt_init_hw_early(i915); } void mock_fini_ggtt(struct i915_ggtt *ggtt) diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c new file mode 100644 index 000000000000..7b0c99ddc2d5 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "gem/i915_gem_region.h" +#include "intel_memory_region.h" + +#include "mock_region.h" + +static const struct drm_i915_gem_object_ops mock_region_obj_ops = { + .get_pages = i915_gem_object_get_pages_buddy, + .put_pages = i915_gem_object_put_pages_buddy, + .release = i915_gem_object_release_memory_region, +}; + +static struct drm_i915_gem_object * +mock_object_create(struct intel_memory_region *mem, + resource_size_t size, + unsigned int flags) +{ + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj; + + if (size > BIT(mem->mm.max_order) * mem->mm.chunk_size) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &mock_region_obj_ops); + + obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE); + + i915_gem_object_init_memory_region(obj, mem, flags); + + return obj; +} + +static const struct intel_memory_region_ops mock_region_ops = { + .init = intel_memory_region_init_buddy, + .release = intel_memory_region_release_buddy, + .create_object = mock_object_create, +}; + +struct intel_memory_region * +mock_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start) +{ + return intel_memory_region_create(i915, start, size, min_page_size, + io_start, &mock_region_ops); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_region.h b/drivers/gpu/drm/i915/selftests/mock_region.h new file mode 100644 index 000000000000..24608089d833 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_region.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __MOCK_REGION_H +#define __MOCK_REGION_H + +struct intel_memory_region * +mock_region_create(struct drm_i915_private *i915, + resource_size_t start, + resource_size_t size, + resource_size_t min_page_size, + resource_size_t io_start); + +#endif /* !__MOCK_REGION_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c index 49585f16d4a2..ca57e4008701 100644 --- a/drivers/gpu/drm/i915/selftests/mock_uncore.c +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c @@ -39,8 +39,11 @@ __nop_read(16) __nop_read(32) __nop_read(64) -void mock_uncore_init(struct intel_uncore *uncore) +void mock_uncore_init(struct intel_uncore *uncore, + struct drm_i915_private *i915) { + intel_uncore_init_early(uncore, i915); + ASSIGN_RAW_WRITE_MMIO_VFUNCS(uncore, nop); ASSIGN_RAW_READ_MMIO_VFUNCS(uncore, nop); } diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.h b/drivers/gpu/drm/i915/selftests/mock_uncore.h index dacb36b5ffcd..8a2cc553f466 100644 --- a/drivers/gpu/drm/i915/selftests/mock_uncore.h +++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h @@ -25,6 +25,7 @@ #ifndef __MOCK_UNCORE_H #define __MOCK_UNCORE_H -void mock_uncore_init(struct intel_uncore *uncore); +void mock_uncore_init(struct intel_uncore *uncore, + struct drm_i915_private *i915); #endif /* !__MOCK_UNCORE_H */ diff --git a/drivers/gpu/drm/mediatek/Makefile b/drivers/gpu/drm/mediatek/Makefile index 82ae49c64221..8067a4be8311 100644 --- a/drivers/gpu/drm/mediatek/Makefile +++ b/drivers/gpu/drm/mediatek/Makefile @@ -12,6 +12,8 @@ mediatek-drm-y := mtk_disp_color.o \ mtk_drm_plane.o \ mtk_dsi.o \ mtk_mipi_tx.o \ + mtk_mt8173_mipi_tx.o \ + mtk_mt8183_mipi_tx.o \ mtk_dpi.o obj-$(CONFIG_DRM_MEDIATEK) += mediatek-drm.o diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 21851756c579..14878ebf59d7 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -19,6 +19,8 @@ #define DISP_REG_OVL_EN 0x000c #define DISP_REG_OVL_RST 0x0014 #define DISP_REG_OVL_ROI_SIZE 0x0020 +#define DISP_REG_OVL_DATAPATH_CON 0x0024 +#define OVL_BGCLR_SEL_IN BIT(2) #define DISP_REG_OVL_ROI_BGCLR 0x0028 #define DISP_REG_OVL_SRC_CON 0x002c #define DISP_REG_OVL_CON(n) (0x0030 + 0x20 * (n)) @@ -31,7 +33,9 @@ #define DISP_REG_OVL_ADDR_MT8173 0x0f40 #define DISP_REG_OVL_ADDR(ovl, n) ((ovl)->data->addr + 0x20 * (n)) -#define OVL_RDMA_MEM_GMC 0x40402020 +#define GMC_THRESHOLD_BITS 16 +#define GMC_THRESHOLD_HIGH ((1 << GMC_THRESHOLD_BITS) / 4) +#define GMC_THRESHOLD_LOW ((1 << GMC_THRESHOLD_BITS) / 8) #define OVL_CON_BYTE_SWAP BIT(24) #define OVL_CON_MTX_YUV_TO_RGB (6 << 16) @@ -49,6 +53,8 @@ struct mtk_disp_ovl_data { unsigned int addr; + unsigned int gmc_bits; + unsigned int layer_nr; bool fmt_rgb565_is_0; }; @@ -126,15 +132,31 @@ static void mtk_ovl_config(struct mtk_ddp_comp *comp, unsigned int w, static unsigned int mtk_ovl_layer_nr(struct mtk_ddp_comp *comp) { - return 4; + struct mtk_disp_ovl *ovl = comp_to_ovl(comp); + + return ovl->data->layer_nr; } static void mtk_ovl_layer_on(struct mtk_ddp_comp *comp, unsigned int idx) { unsigned int reg; + unsigned int gmc_thrshd_l; + unsigned int gmc_thrshd_h; + unsigned int gmc_value; + struct mtk_disp_ovl *ovl = comp_to_ovl(comp); writel(0x1, comp->regs + DISP_REG_OVL_RDMA_CTRL(idx)); - writel(OVL_RDMA_MEM_GMC, comp->regs + DISP_REG_OVL_RDMA_GMC(idx)); + + gmc_thrshd_l = GMC_THRESHOLD_LOW >> + (GMC_THRESHOLD_BITS - ovl->data->gmc_bits); + gmc_thrshd_h = GMC_THRESHOLD_HIGH >> + (GMC_THRESHOLD_BITS - ovl->data->gmc_bits); + if (ovl->data->gmc_bits == 10) + gmc_value = gmc_thrshd_h | gmc_thrshd_h << 16; + else + gmc_value = gmc_thrshd_l | gmc_thrshd_l << 8 | + gmc_thrshd_h << 16 | gmc_thrshd_h << 24; + writel(gmc_value, comp->regs + DISP_REG_OVL_RDMA_GMC(idx)); reg = readl(comp->regs + DISP_REG_OVL_SRC_CON); reg = reg | BIT(idx); @@ -217,6 +239,24 @@ static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, mtk_ovl_layer_on(comp, idx); } +static void mtk_ovl_bgclr_in_on(struct mtk_ddp_comp *comp) +{ + unsigned int reg; + + reg = readl(comp->regs + DISP_REG_OVL_DATAPATH_CON); + reg = reg | OVL_BGCLR_SEL_IN; + writel(reg, comp->regs + DISP_REG_OVL_DATAPATH_CON); +} + +static void mtk_ovl_bgclr_in_off(struct mtk_ddp_comp *comp) +{ + unsigned int reg; + + reg = readl(comp->regs + DISP_REG_OVL_DATAPATH_CON); + reg = reg & ~OVL_BGCLR_SEL_IN; + writel(reg, comp->regs + DISP_REG_OVL_DATAPATH_CON); +} + static const struct mtk_ddp_comp_funcs mtk_disp_ovl_funcs = { .config = mtk_ovl_config, .start = mtk_ovl_start, @@ -227,6 +267,8 @@ static const struct mtk_ddp_comp_funcs mtk_disp_ovl_funcs = { .layer_on = mtk_ovl_layer_on, .layer_off = mtk_ovl_layer_off, .layer_config = mtk_ovl_layer_config, + .bgclr_in_on = mtk_ovl_bgclr_in_on, + .bgclr_in_off = mtk_ovl_bgclr_in_off, }; static int mtk_disp_ovl_bind(struct device *dev, struct device *master, @@ -276,7 +318,12 @@ static int mtk_disp_ovl_probe(struct platform_device *pdev) if (irq < 0) return irq; - comp_id = mtk_ddp_comp_get_id(dev->of_node, MTK_DISP_OVL); + priv->data = of_device_get_match_data(dev); + + comp_id = mtk_ddp_comp_get_id(dev->of_node, + priv->data->layer_nr == 4 ? + MTK_DISP_OVL : + MTK_DISP_OVL_2L); if (comp_id < 0) { dev_err(dev, "Failed to identify by alias: %d\n", comp_id); return comp_id; @@ -289,8 +336,6 @@ static int mtk_disp_ovl_probe(struct platform_device *pdev) return ret; } - priv->data = of_device_get_match_data(dev); - platform_set_drvdata(pdev, priv); ret = devm_request_irq(dev, irq, mtk_disp_ovl_irq_handler, @@ -316,11 +361,15 @@ static int mtk_disp_ovl_remove(struct platform_device *pdev) static const struct mtk_disp_ovl_data mt2701_ovl_driver_data = { .addr = DISP_REG_OVL_ADDR_MT2701, + .gmc_bits = 8, + .layer_nr = 4, .fmt_rgb565_is_0 = false, }; static const struct mtk_disp_ovl_data mt8173_ovl_driver_data = { .addr = DISP_REG_OVL_ADDR_MT8173, + .gmc_bits = 8, + .layer_nr = 4, .fmt_rgb565_is_0 = true, }; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 34a731755791..b841d3706d8b 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -272,6 +272,9 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[i]; + if (i == 1) + mtk_ddp_comp_bgclr_in_on(comp); + mtk_ddp_comp_config(comp, width, height, vrefresh, bpc); mtk_ddp_comp_start(comp); } @@ -280,9 +283,18 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) for (i = 0; i < mtk_crtc->layer_nr; i++) { struct drm_plane *plane = &mtk_crtc->planes[i]; struct mtk_plane_state *plane_state; + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; + unsigned int comp_layer_nr = mtk_ddp_comp_layer_nr(comp); + unsigned int local_layer; plane_state = to_mtk_plane_state(plane->state); - mtk_ddp_comp_layer_config(mtk_crtc->ddp_comp[0], i, + + if (i >= comp_layer_nr) { + comp = mtk_crtc->ddp_comp[1]; + local_layer = i - comp_layer_nr; + } else + local_layer = i; + mtk_ddp_comp_layer_config(comp, local_layer, plane_state); } @@ -301,8 +313,12 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) int i; DRM_DEBUG_DRIVER("%s\n", __func__); - for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) + for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) { mtk_ddp_comp_stop(mtk_crtc->ddp_comp[i]); + if (i == 1) + mtk_ddp_comp_bgclr_in_off(mtk_crtc->ddp_comp[i]); + } + for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) mtk_disp_mutex_remove_comp(mtk_crtc->mutex, mtk_crtc->ddp_comp[i]->id); @@ -327,6 +343,8 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) struct mtk_crtc_state *state = to_mtk_crtc_state(mtk_crtc->base.state); struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[0]; unsigned int i; + unsigned int comp_layer_nr = mtk_ddp_comp_layer_nr(comp); + unsigned int local_layer; /* * TODO: instead of updating the registers here, we should prepare @@ -349,7 +367,14 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc) plane_state = to_mtk_plane_state(plane->state); if (plane_state->pending.config) { - mtk_ddp_comp_layer_config(comp, i, plane_state); + if (i >= comp_layer_nr) { + comp = mtk_crtc->ddp_comp[1]; + local_layer = i - comp_layer_nr; + } else + local_layer = i; + + mtk_ddp_comp_layer_config(comp, local_layer, + plane_state); plane_state->pending.config = false; } } @@ -582,6 +607,12 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, } mtk_crtc->layer_nr = mtk_ddp_comp_layer_nr(mtk_crtc->ddp_comp[0]); + if (mtk_crtc->ddp_comp_nr > 1) { + struct mtk_ddp_comp *comp = mtk_crtc->ddp_comp[1]; + + if (comp->funcs->bgclr_in_on) + mtk_crtc->layer_nr += mtk_ddp_comp_layer_nr(comp); + } mtk_crtc->planes = devm_kcalloc(dev, mtk_crtc->layer_nr, sizeof(struct drm_plane), GFP_KERNEL); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c index 8106a71a7404..13035c906035 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c @@ -33,12 +33,15 @@ #define DISP_REG_CONFIG_DSI_SEL 0x050 #define DISP_REG_CONFIG_DPI_SEL 0x064 -#define DISP_REG_MUTEX_EN(n) (0x20 + 0x20 * (n)) -#define DISP_REG_MUTEX(n) (0x24 + 0x20 * (n)) -#define DISP_REG_MUTEX_RST(n) (0x28 + 0x20 * (n)) -#define DISP_REG_MUTEX_MOD(n) (0x2c + 0x20 * (n)) -#define DISP_REG_MUTEX_SOF(n) (0x30 + 0x20 * (n)) -#define DISP_REG_MUTEX_MOD2(n) (0x34 + 0x20 * (n)) +#define MT2701_DISP_MUTEX0_MOD0 0x2c +#define MT2701_DISP_MUTEX0_SOF0 0x30 + +#define DISP_REG_MUTEX_EN(n) (0x20 + 0x20 * (n)) +#define DISP_REG_MUTEX(n) (0x24 + 0x20 * (n)) +#define DISP_REG_MUTEX_RST(n) (0x28 + 0x20 * (n)) +#define DISP_REG_MUTEX_MOD(mutex_mod_reg, n) (mutex_mod_reg + 0x20 * (n)) +#define DISP_REG_MUTEX_SOF(mutex_sof_reg, n) (mutex_sof_reg + 0x20 * (n)) +#define DISP_REG_MUTEX_MOD2(n) (0x34 + 0x20 * (n)) #define INT_MUTEX BIT(1) @@ -139,12 +142,30 @@ struct mtk_disp_mutex { bool claimed; }; +enum mtk_ddp_mutex_sof_id { + DDP_MUTEX_SOF_SINGLE_MODE, + DDP_MUTEX_SOF_DSI0, + DDP_MUTEX_SOF_DSI1, + DDP_MUTEX_SOF_DPI0, + DDP_MUTEX_SOF_DPI1, + DDP_MUTEX_SOF_DSI2, + DDP_MUTEX_SOF_DSI3, +}; + +struct mtk_ddp_data { + const unsigned int *mutex_mod; + const unsigned int *mutex_sof; + const unsigned int mutex_mod_reg; + const unsigned int mutex_sof_reg; + const bool no_clk; +}; + struct mtk_ddp { struct device *dev; struct clk *clk; void __iomem *regs; struct mtk_disp_mutex mutex[10]; - const unsigned int *mutex_mod; + const struct mtk_ddp_data *data; }; static const unsigned int mt2701_mutex_mod[DDP_COMPONENT_ID_MAX] = { @@ -194,6 +215,37 @@ static const unsigned int mt8173_mutex_mod[DDP_COMPONENT_ID_MAX] = { [DDP_COMPONENT_WDMA1] = MT8173_MUTEX_MOD_DISP_WDMA1, }; +static const unsigned int mt2712_mutex_sof[DDP_MUTEX_SOF_DSI3 + 1] = { + [DDP_MUTEX_SOF_SINGLE_MODE] = MUTEX_SOF_SINGLE_MODE, + [DDP_MUTEX_SOF_DSI0] = MUTEX_SOF_DSI0, + [DDP_MUTEX_SOF_DSI1] = MUTEX_SOF_DSI1, + [DDP_MUTEX_SOF_DPI0] = MUTEX_SOF_DPI0, + [DDP_MUTEX_SOF_DPI1] = MUTEX_SOF_DPI1, + [DDP_MUTEX_SOF_DSI2] = MUTEX_SOF_DSI2, + [DDP_MUTEX_SOF_DSI3] = MUTEX_SOF_DSI3, +}; + +static const struct mtk_ddp_data mt2701_ddp_driver_data = { + .mutex_mod = mt2701_mutex_mod, + .mutex_sof = mt2712_mutex_sof, + .mutex_mod_reg = MT2701_DISP_MUTEX0_MOD0, + .mutex_sof_reg = MT2701_DISP_MUTEX0_SOF0, +}; + +static const struct mtk_ddp_data mt2712_ddp_driver_data = { + .mutex_mod = mt2712_mutex_mod, + .mutex_sof = mt2712_mutex_sof, + .mutex_mod_reg = MT2701_DISP_MUTEX0_MOD0, + .mutex_sof_reg = MT2701_DISP_MUTEX0_SOF0, +}; + +static const struct mtk_ddp_data mt8173_ddp_driver_data = { + .mutex_mod = mt8173_mutex_mod, + .mutex_sof = mt2712_mutex_sof, + .mutex_mod_reg = MT2701_DISP_MUTEX0_MOD0, + .mutex_sof_reg = MT2701_DISP_MUTEX0_SOF0, +}; + static unsigned int mtk_ddp_mout_en(enum mtk_ddp_comp_id cur, enum mtk_ddp_comp_id next, unsigned int *addr) @@ -432,45 +484,49 @@ void mtk_disp_mutex_add_comp(struct mtk_disp_mutex *mutex, struct mtk_ddp *ddp = container_of(mutex, struct mtk_ddp, mutex[mutex->id]); unsigned int reg; + unsigned int sof_id; unsigned int offset; WARN_ON(&ddp->mutex[mutex->id] != mutex); switch (id) { case DDP_COMPONENT_DSI0: - reg = MUTEX_SOF_DSI0; + sof_id = DDP_MUTEX_SOF_DSI0; break; case DDP_COMPONENT_DSI1: - reg = MUTEX_SOF_DSI0; + sof_id = DDP_MUTEX_SOF_DSI0; break; case DDP_COMPONENT_DSI2: - reg = MUTEX_SOF_DSI2; + sof_id = DDP_MUTEX_SOF_DSI2; break; case DDP_COMPONENT_DSI3: - reg = MUTEX_SOF_DSI3; + sof_id = DDP_MUTEX_SOF_DSI3; break; case DDP_COMPONENT_DPI0: - reg = MUTEX_SOF_DPI0; + sof_id = DDP_MUTEX_SOF_DPI0; break; case DDP_COMPONENT_DPI1: - reg = MUTEX_SOF_DPI1; + sof_id = DDP_MUTEX_SOF_DPI1; break; default: - if (ddp->mutex_mod[id] < 32) { - offset = DISP_REG_MUTEX_MOD(mutex->id); + if (ddp->data->mutex_mod[id] < 32) { + offset = DISP_REG_MUTEX_MOD(ddp->data->mutex_mod_reg, + mutex->id); reg = readl_relaxed(ddp->regs + offset); - reg |= 1 << ddp->mutex_mod[id]; + reg |= 1 << ddp->data->mutex_mod[id]; writel_relaxed(reg, ddp->regs + offset); } else { offset = DISP_REG_MUTEX_MOD2(mutex->id); reg = readl_relaxed(ddp->regs + offset); - reg |= 1 << (ddp->mutex_mod[id] - 32); + reg |= 1 << (ddp->data->mutex_mod[id] - 32); writel_relaxed(reg, ddp->regs + offset); } return; } - writel_relaxed(reg, ddp->regs + DISP_REG_MUTEX_SOF(mutex->id)); + writel_relaxed(ddp->data->mutex_sof[sof_id], + ddp->regs + + DISP_REG_MUTEX_SOF(ddp->data->mutex_sof_reg, mutex->id)); } void mtk_disp_mutex_remove_comp(struct mtk_disp_mutex *mutex, @@ -491,18 +547,21 @@ void mtk_disp_mutex_remove_comp(struct mtk_disp_mutex *mutex, case DDP_COMPONENT_DPI0: case DDP_COMPONENT_DPI1: writel_relaxed(MUTEX_SOF_SINGLE_MODE, - ddp->regs + DISP_REG_MUTEX_SOF(mutex->id)); + ddp->regs + + DISP_REG_MUTEX_SOF(ddp->data->mutex_sof_reg, + mutex->id)); break; default: - if (ddp->mutex_mod[id] < 32) { - offset = DISP_REG_MUTEX_MOD(mutex->id); + if (ddp->data->mutex_mod[id] < 32) { + offset = DISP_REG_MUTEX_MOD(ddp->data->mutex_mod_reg, + mutex->id); reg = readl_relaxed(ddp->regs + offset); - reg &= ~(1 << ddp->mutex_mod[id]); + reg &= ~(1 << ddp->data->mutex_mod[id]); writel_relaxed(reg, ddp->regs + offset); } else { offset = DISP_REG_MUTEX_MOD2(mutex->id); reg = readl_relaxed(ddp->regs + offset); - reg &= ~(1 << (ddp->mutex_mod[id] - 32)); + reg &= ~(1 << (ddp->data->mutex_mod[id] - 32)); writel_relaxed(reg, ddp->regs + offset); } break; @@ -564,10 +623,14 @@ static int mtk_ddp_probe(struct platform_device *pdev) for (i = 0; i < 10; i++) ddp->mutex[i].id = i; - ddp->clk = devm_clk_get(dev, NULL); - if (IS_ERR(ddp->clk)) { - dev_err(dev, "Failed to get clock\n"); - return PTR_ERR(ddp->clk); + ddp->data = of_device_get_match_data(dev); + + if (!ddp->data->no_clk) { + ddp->clk = devm_clk_get(dev, NULL); + if (IS_ERR(ddp->clk)) { + dev_err(dev, "Failed to get clock\n"); + return PTR_ERR(ddp->clk); + } } regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -577,8 +640,6 @@ static int mtk_ddp_probe(struct platform_device *pdev) return PTR_ERR(ddp->regs); } - ddp->mutex_mod = of_device_get_match_data(dev); - platform_set_drvdata(pdev, ddp); return 0; @@ -590,9 +651,12 @@ static int mtk_ddp_remove(struct platform_device *pdev) } static const struct of_device_id ddp_driver_dt_match[] = { - { .compatible = "mediatek,mt2701-disp-mutex", .data = mt2701_mutex_mod}, - { .compatible = "mediatek,mt2712-disp-mutex", .data = mt2712_mutex_mod}, - { .compatible = "mediatek,mt8173-disp-mutex", .data = mt8173_mutex_mod}, + { .compatible = "mediatek,mt2701-disp-mutex", + .data = &mt2701_ddp_driver_data}, + { .compatible = "mediatek,mt2712-disp-mutex", + .data = &mt2712_ddp_driver_data}, + { .compatible = "mediatek,mt8173-disp-mutex", + .data = &mt8173_ddp_driver_data}, {}, }; MODULE_DEVICE_TABLE(of, ddp_driver_dt_match); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index efa85973e46b..7f21307cda75 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -33,6 +33,18 @@ #define DISP_AAL_EN 0x0000 #define DISP_AAL_SIZE 0x0030 +#define DISP_CCORR_EN 0x0000 +#define CCORR_EN BIT(0) +#define DISP_CCORR_CFG 0x0020 +#define CCORR_RELAY_MODE BIT(0) +#define DISP_CCORR_SIZE 0x0030 + +#define DISP_DITHER_EN 0x0000 +#define DITHER_EN BIT(0) +#define DISP_DITHER_CFG 0x0020 +#define DITHER_RELAY_MODE BIT(0) +#define DISP_DITHER_SIZE 0x0030 + #define DISP_GAMMA_EN 0x0000 #define DISP_GAMMA_CFG 0x0020 #define DISP_GAMMA_SIZE 0x0030 @@ -123,6 +135,42 @@ static void mtk_aal_stop(struct mtk_ddp_comp *comp) writel_relaxed(0x0, comp->regs + DISP_AAL_EN); } +static void mtk_ccorr_config(struct mtk_ddp_comp *comp, unsigned int w, + unsigned int h, unsigned int vrefresh, + unsigned int bpc) +{ + writel(h << 16 | w, comp->regs + DISP_CCORR_SIZE); + writel(CCORR_RELAY_MODE, comp->regs + DISP_CCORR_CFG); +} + +static void mtk_ccorr_start(struct mtk_ddp_comp *comp) +{ + writel(CCORR_EN, comp->regs + DISP_CCORR_EN); +} + +static void mtk_ccorr_stop(struct mtk_ddp_comp *comp) +{ + writel_relaxed(0x0, comp->regs + DISP_CCORR_EN); +} + +static void mtk_dither_config(struct mtk_ddp_comp *comp, unsigned int w, + unsigned int h, unsigned int vrefresh, + unsigned int bpc) +{ + writel(h << 16 | w, comp->regs + DISP_DITHER_SIZE); + writel(DITHER_RELAY_MODE, comp->regs + DISP_DITHER_CFG); +} + +static void mtk_dither_start(struct mtk_ddp_comp *comp) +{ + writel(DITHER_EN, comp->regs + DISP_DITHER_EN); +} + +static void mtk_dither_stop(struct mtk_ddp_comp *comp) +{ + writel_relaxed(0x0, comp->regs + DISP_DITHER_EN); +} + static void mtk_gamma_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int h, unsigned int vrefresh, unsigned int bpc) @@ -171,6 +219,18 @@ static const struct mtk_ddp_comp_funcs ddp_aal = { .stop = mtk_aal_stop, }; +static const struct mtk_ddp_comp_funcs ddp_ccorr = { + .config = mtk_ccorr_config, + .start = mtk_ccorr_start, + .stop = mtk_ccorr_stop, +}; + +static const struct mtk_ddp_comp_funcs ddp_dither = { + .config = mtk_dither_config, + .start = mtk_dither_start, + .stop = mtk_dither_stop, +}; + static const struct mtk_ddp_comp_funcs ddp_gamma = { .gamma_set = mtk_gamma_set, .config = mtk_gamma_config, @@ -189,11 +249,14 @@ static const struct mtk_ddp_comp_funcs ddp_ufoe = { static const char * const mtk_ddp_comp_stem[MTK_DDP_COMP_TYPE_MAX] = { [MTK_DISP_OVL] = "ovl", + [MTK_DISP_OVL_2L] = "ovl_2l", [MTK_DISP_RDMA] = "rdma", [MTK_DISP_WDMA] = "wdma", [MTK_DISP_COLOR] = "color", + [MTK_DISP_CCORR] = "ccorr", [MTK_DISP_AAL] = "aal", [MTK_DISP_GAMMA] = "gamma", + [MTK_DISP_DITHER] = "dither", [MTK_DISP_UFOE] = "ufoe", [MTK_DSI] = "dsi", [MTK_DPI] = "dpi", @@ -213,8 +276,10 @@ static const struct mtk_ddp_comp_match mtk_ddp_matches[DDP_COMPONENT_ID_MAX] = { [DDP_COMPONENT_AAL0] = { MTK_DISP_AAL, 0, &ddp_aal }, [DDP_COMPONENT_AAL1] = { MTK_DISP_AAL, 1, &ddp_aal }, [DDP_COMPONENT_BLS] = { MTK_DISP_BLS, 0, NULL }, + [DDP_COMPONENT_CCORR] = { MTK_DISP_CCORR, 0, &ddp_ccorr }, [DDP_COMPONENT_COLOR0] = { MTK_DISP_COLOR, 0, NULL }, [DDP_COMPONENT_COLOR1] = { MTK_DISP_COLOR, 1, NULL }, + [DDP_COMPONENT_DITHER] = { MTK_DISP_DITHER, 0, &ddp_dither }, [DDP_COMPONENT_DPI0] = { MTK_DPI, 0, NULL }, [DDP_COMPONENT_DPI1] = { MTK_DPI, 1, NULL }, [DDP_COMPONENT_DSI0] = { MTK_DSI, 0, NULL }, @@ -226,6 +291,8 @@ static const struct mtk_ddp_comp_match mtk_ddp_matches[DDP_COMPONENT_ID_MAX] = { [DDP_COMPONENT_OD1] = { MTK_DISP_OD, 1, &ddp_od }, [DDP_COMPONENT_OVL0] = { MTK_DISP_OVL, 0, NULL }, [DDP_COMPONENT_OVL1] = { MTK_DISP_OVL, 1, NULL }, + [DDP_COMPONENT_OVL_2L0] = { MTK_DISP_OVL_2L, 0, NULL }, + [DDP_COMPONENT_OVL_2L1] = { MTK_DISP_OVL_2L, 1, NULL }, [DDP_COMPONENT_PWM0] = { MTK_DISP_PWM, 0, NULL }, [DDP_COMPONENT_PWM1] = { MTK_DISP_PWM, 1, NULL }, [DDP_COMPONENT_PWM2] = { MTK_DISP_PWM, 2, NULL }, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h index 0ad287f427cc..26441f4d1ad3 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h @@ -17,9 +17,12 @@ struct drm_crtc_state; enum mtk_ddp_comp_type { MTK_DISP_OVL, + MTK_DISP_OVL_2L, MTK_DISP_RDMA, MTK_DISP_WDMA, MTK_DISP_COLOR, + MTK_DISP_CCORR, + MTK_DISP_DITHER, MTK_DISP_AAL, MTK_DISP_GAMMA, MTK_DISP_UFOE, @@ -36,8 +39,10 @@ enum mtk_ddp_comp_id { DDP_COMPONENT_AAL0, DDP_COMPONENT_AAL1, DDP_COMPONENT_BLS, + DDP_COMPONENT_CCORR, DDP_COMPONENT_COLOR0, DDP_COMPONENT_COLOR1, + DDP_COMPONENT_DITHER, DDP_COMPONENT_DPI0, DDP_COMPONENT_DPI1, DDP_COMPONENT_DSI0, @@ -48,6 +53,8 @@ enum mtk_ddp_comp_id { DDP_COMPONENT_OD0, DDP_COMPONENT_OD1, DDP_COMPONENT_OVL0, + DDP_COMPONENT_OVL_2L0, + DDP_COMPONENT_OVL_2L1, DDP_COMPONENT_OVL1, DDP_COMPONENT_PWM0, DDP_COMPONENT_PWM1, @@ -77,6 +84,8 @@ struct mtk_ddp_comp_funcs { struct mtk_plane_state *state); void (*gamma_set)(struct mtk_ddp_comp *comp, struct drm_crtc_state *state); + void (*bgclr_in_on)(struct mtk_ddp_comp *comp); + void (*bgclr_in_off)(struct mtk_ddp_comp *comp); }; struct mtk_ddp_comp { @@ -158,6 +167,18 @@ static inline void mtk_ddp_gamma_set(struct mtk_ddp_comp *comp, comp->funcs->gamma_set(comp, state); } +static inline void mtk_ddp_comp_bgclr_in_on(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->bgclr_in_on) + comp->funcs->bgclr_in_on(comp); +} + +static inline void mtk_ddp_comp_bgclr_in_off(struct mtk_ddp_comp *comp) +{ + if (comp->funcs && comp->funcs->bgclr_in_off) + comp->funcs->bgclr_in_off(comp); +} + int mtk_ddp_comp_get_id(struct device_node *node, enum mtk_ddp_comp_type comp_type); int mtk_ddp_comp_init(struct device *dev, struct device_node *comp_node, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 352b81a7a670..84d14213d992 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -547,6 +547,7 @@ static int mtk_drm_probe(struct platform_device *pdev) */ if (comp_type == MTK_DISP_COLOR || comp_type == MTK_DISP_OVL || + comp_type == MTK_DISP_OVL_2L || comp_type == MTK_DISP_RDMA || comp_type == MTK_DSI || comp_type == MTK_DPI) { @@ -669,8 +670,8 @@ static struct platform_driver * const mtk_drm_drivers[] = { &mtk_disp_rdma_driver, &mtk_dpi_driver, &mtk_drm_platform_driver, - &mtk_dsi_driver, &mtk_mipi_tx_driver, + &mtk_dsi_driver, }; static int __init mtk_drm_init(void) diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index a413f5ff442d..e9931bbbe846 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -40,6 +40,7 @@ #define DSI_CON_CTRL 0x10 #define DSI_RESET BIT(0) #define DSI_EN BIT(1) +#define DPHY_RESET BIT(2) #define DSI_MODE_CTRL 0x14 #define MODE (3) @@ -73,6 +74,7 @@ #define DSI_VBP_NL 0x24 #define DSI_VFP_NL 0x28 #define DSI_VACT_NL 0x2C +#define DSI_SIZE_CON 0x38 #define DSI_HSA_WC 0x50 #define DSI_HBP_WC 0x54 #define DSI_HFP_WC 0x58 @@ -126,7 +128,10 @@ #define VM_CMD_EN BIT(0) #define TS_VFP_EN BIT(5) -#define DSI_CMDQ0 0x180 +#define DSI_SHADOW_DEBUG 0x190U +#define FORCE_COMMIT BIT(0) +#define BYPASS_SHADOW BIT(1) + #define CONFIG (0xff << 0) #define SHORT_PACKET 0 #define LONG_PACKET 2 @@ -135,12 +140,6 @@ #define DATA_0 (0xff << 16) #define DATA_1 (0xff << 24) -#define T_LPX 5 -#define T_HS_PREP 6 -#define T_HS_TRAIL 8 -#define T_HS_EXIT 7 -#define T_HS_ZERO 10 - #define NS_TO_CYCLE(n, c) ((n) / (c) + (((n) % (c)) ? 1 : 0)) #define MTK_DSI_HOST_IS_READ(type) \ @@ -149,8 +148,33 @@ (type == MIPI_DSI_GENERIC_READ_REQUEST_2_PARAM) || \ (type == MIPI_DSI_DCS_READ)) +struct mtk_phy_timing { + u32 lpx; + u32 da_hs_prepare; + u32 da_hs_zero; + u32 da_hs_trail; + + u32 ta_go; + u32 ta_sure; + u32 ta_get; + u32 da_hs_exit; + + u32 clk_hs_zero; + u32 clk_hs_trail; + + u32 clk_hs_prepare; + u32 clk_hs_post; + u32 clk_hs_exit; +}; + struct phy; +struct mtk_dsi_driver_data { + const u32 reg_cmdq_off; + bool has_shadow_ctl; + bool has_size_ctl; +}; + struct mtk_dsi { struct mtk_ddp_comp ddp_comp; struct device *dev; @@ -173,10 +197,12 @@ struct mtk_dsi { enum mipi_dsi_pixel_format format; unsigned int lanes; struct videomode vm; + struct mtk_phy_timing phy_timing; int refcount; bool enabled; u32 irq_data; wait_queue_head_t irq_wait_queue; + const struct mtk_dsi_driver_data *driver_data; }; static inline struct mtk_dsi *encoder_to_dsi(struct drm_encoder *e) @@ -205,17 +231,36 @@ static void mtk_dsi_phy_timconfig(struct mtk_dsi *dsi) { u32 timcon0, timcon1, timcon2, timcon3; u32 ui, cycle_time; + struct mtk_phy_timing *timing = &dsi->phy_timing; + + ui = DIV_ROUND_UP(1000000000, dsi->data_rate); + cycle_time = div_u64(8000000000ULL, dsi->data_rate); + + timing->lpx = NS_TO_CYCLE(60, cycle_time); + timing->da_hs_prepare = NS_TO_CYCLE(50 + 5 * ui, cycle_time); + timing->da_hs_zero = NS_TO_CYCLE(110 + 6 * ui, cycle_time); + timing->da_hs_trail = NS_TO_CYCLE(77 + 4 * ui, cycle_time); + + timing->ta_go = 4 * timing->lpx; + timing->ta_sure = 3 * timing->lpx / 2; + timing->ta_get = 5 * timing->lpx; + timing->da_hs_exit = 2 * timing->lpx; - ui = 1000 / dsi->data_rate + 0x01; - cycle_time = 8000 / dsi->data_rate + 0x01; + timing->clk_hs_zero = NS_TO_CYCLE(336, cycle_time); + timing->clk_hs_trail = NS_TO_CYCLE(100, cycle_time) + 10; - timcon0 = T_LPX | T_HS_PREP << 8 | T_HS_ZERO << 16 | T_HS_TRAIL << 24; - timcon1 = 4 * T_LPX | (3 * T_LPX / 2) << 8 | 5 * T_LPX << 16 | - T_HS_EXIT << 24; - timcon2 = ((NS_TO_CYCLE(0x64, cycle_time) + 0xa) << 24) | - (NS_TO_CYCLE(0x150, cycle_time) << 16); - timcon3 = NS_TO_CYCLE(0x40, cycle_time) | (2 * T_LPX) << 16 | - NS_TO_CYCLE(80 + 52 * ui, cycle_time) << 8; + timing->clk_hs_prepare = NS_TO_CYCLE(64, cycle_time); + timing->clk_hs_post = NS_TO_CYCLE(80 + 52 * ui, cycle_time); + timing->clk_hs_exit = 2 * timing->lpx; + + timcon0 = timing->lpx | timing->da_hs_prepare << 8 | + timing->da_hs_zero << 16 | timing->da_hs_trail << 24; + timcon1 = timing->ta_go | timing->ta_sure << 8 | + timing->ta_get << 16 | timing->da_hs_exit << 24; + timcon2 = 1 << 8 | timing->clk_hs_zero << 16 | + timing->clk_hs_trail << 24; + timcon3 = timing->clk_hs_prepare | timing->clk_hs_post << 8 | + timing->clk_hs_exit << 16; writel(timcon0, dsi->regs + DSI_PHY_TIMECON0); writel(timcon1, dsi->regs + DSI_PHY_TIMECON1); @@ -239,6 +284,12 @@ static void mtk_dsi_reset_engine(struct mtk_dsi *dsi) mtk_dsi_mask(dsi, DSI_CON_CTRL, DSI_RESET, 0); } +static void mtk_dsi_reset_dphy(struct mtk_dsi *dsi) +{ + mtk_dsi_mask(dsi, DSI_CON_CTRL, DPHY_RESET, DPHY_RESET); + mtk_dsi_mask(dsi, DSI_CON_CTRL, DPHY_RESET, 0); +} + static void mtk_dsi_clk_ulp_mode_enter(struct mtk_dsi *dsi) { mtk_dsi_mask(dsi, DSI_PHY_LCCON, LC_HS_TX_EN, 0); @@ -402,7 +453,8 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi) u32 horizontal_sync_active_byte; u32 horizontal_backporch_byte; u32 horizontal_frontporch_byte; - u32 dsi_tmp_buf_bpp; + u32 dsi_tmp_buf_bpp, data_phy_cycles; + struct mtk_phy_timing *timing = &dsi->phy_timing; struct videomode *vm = &dsi->vm; @@ -416,6 +468,10 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi) writel(vm->vfront_porch, dsi->regs + DSI_VFP_NL); writel(vm->vactive, dsi->regs + DSI_VACT_NL); + if (dsi->driver_data->has_size_ctl) + writel(vm->vactive << 16 | vm->hactive, + dsi->regs + DSI_SIZE_CON); + horizontal_sync_active_byte = (vm->hsync_len * dsi_tmp_buf_bpp - 10); if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) @@ -425,7 +481,34 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi) horizontal_backporch_byte = ((vm->hback_porch + vm->hsync_len) * dsi_tmp_buf_bpp - 10); - horizontal_frontporch_byte = (vm->hfront_porch * dsi_tmp_buf_bpp - 12); + data_phy_cycles = timing->lpx + timing->da_hs_prepare + + timing->da_hs_zero + timing->da_hs_exit + 2; + + if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) { + if (vm->hfront_porch * dsi_tmp_buf_bpp > + data_phy_cycles * dsi->lanes + 18) { + horizontal_frontporch_byte = vm->hfront_porch * + dsi_tmp_buf_bpp - + data_phy_cycles * + dsi->lanes - 18; + } else { + DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n"); + horizontal_frontporch_byte = vm->hfront_porch * + dsi_tmp_buf_bpp; + } + } else { + if (vm->hfront_porch * dsi_tmp_buf_bpp > + data_phy_cycles * dsi->lanes + 12) { + horizontal_frontporch_byte = vm->hfront_porch * + dsi_tmp_buf_bpp - + data_phy_cycles * + dsi->lanes - 12; + } else { + DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n"); + horizontal_frontporch_byte = vm->hfront_porch * + dsi_tmp_buf_bpp; + } + } writel(horizontal_sync_active_byte, dsi->regs + DSI_HSA_WC); writel(horizontal_backporch_byte, dsi->regs + DSI_HBP_WC); @@ -523,10 +606,9 @@ static s32 mtk_dsi_switch_to_cmd_mode(struct mtk_dsi *dsi, u8 irq_flag, u32 t) static int mtk_dsi_poweron(struct mtk_dsi *dsi) { - struct device *dev = dsi->dev; + struct device *dev = dsi->host.dev; int ret; - u64 pixel_clock, total_bits; - u32 htotal, htotal_bits, bit_per_pixel, overhead_cycles, overhead_bits; + u32 bit_per_pixel; if (++dsi->refcount != 1) return 0; @@ -545,24 +627,8 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi) break; } - /** - * htotal_time = htotal * byte_per_pixel / num_lanes - * overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit - * mipi_ratio = (htotal_time + overhead_time) / htotal_time - * data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes; - */ - pixel_clock = dsi->vm.pixelclock; - htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch + - dsi->vm.hsync_len; - htotal_bits = htotal * bit_per_pixel; - - overhead_cycles = T_LPX + T_HS_PREP + T_HS_ZERO + T_HS_TRAIL + - T_HS_EXIT; - overhead_bits = overhead_cycles * dsi->lanes * 8; - total_bits = htotal_bits + overhead_bits; - - dsi->data_rate = DIV_ROUND_UP_ULL(pixel_clock * total_bits, - htotal * dsi->lanes); + dsi->data_rate = DIV_ROUND_UP_ULL(dsi->vm.pixelclock * bit_per_pixel, + dsi->lanes); ret = clk_set_rate(dsi->hs_clk, dsi->data_rate); if (ret < 0) { @@ -585,10 +651,17 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi) } mtk_dsi_enable(dsi); + + if (dsi->driver_data->has_shadow_ctl) + writel(FORCE_COMMIT | BYPASS_SHADOW, + dsi->regs + DSI_SHADOW_DEBUG); + mtk_dsi_reset_engine(dsi); mtk_dsi_phy_timconfig(dsi); mtk_dsi_rxtx_control(dsi); + usleep_range(30, 100); + mtk_dsi_reset_dphy(dsi); mtk_dsi_ps_control_vact(dsi); mtk_dsi_set_vm_cmd(dsi); mtk_dsi_config_vdo_timing(dsi); @@ -939,6 +1012,7 @@ static void mtk_dsi_cmdq(struct mtk_dsi *dsi, const struct mipi_dsi_msg *msg) const char *tx_buf = msg->tx_buf; u8 config, cmdq_size, cmdq_off, type = msg->type; u32 reg_val, cmdq_mask, i; + u32 reg_cmdq_off = dsi->driver_data->reg_cmdq_off; if (MTK_DSI_HOST_IS_READ(type)) config = BTA; @@ -958,9 +1032,11 @@ static void mtk_dsi_cmdq(struct mtk_dsi *dsi, const struct mipi_dsi_msg *msg) } for (i = 0; i < msg->tx_len; i++) - writeb(tx_buf[i], dsi->regs + DSI_CMDQ0 + cmdq_off + i); + mtk_dsi_mask(dsi, (reg_cmdq_off + cmdq_off + i) & (~0x3U), + (0xffUL << (((i + cmdq_off) & 3U) * 8U)), + tx_buf[i] << (((i + cmdq_off) & 3U) * 8U)); - mtk_dsi_mask(dsi, DSI_CMDQ0, cmdq_mask, reg_val); + mtk_dsi_mask(dsi, reg_cmdq_off, cmdq_mask, reg_val); mtk_dsi_mask(dsi, DSI_CMDQ_SIZE, CMDQ_SIZE, cmdq_size); } @@ -1050,12 +1126,6 @@ static int mtk_dsi_bind(struct device *dev, struct device *master, void *data) return ret; } - ret = mipi_dsi_host_register(&dsi->host); - if (ret < 0) { - dev_err(dev, "failed to register DSI host: %d\n", ret); - goto err_ddp_comp_unregister; - } - ret = mtk_dsi_create_conn_enc(drm, dsi); if (ret) { DRM_ERROR("Encoder create failed with %d\n", ret); @@ -1065,8 +1135,6 @@ static int mtk_dsi_bind(struct device *dev, struct device *master, void *data) return 0; err_unregister: - mipi_dsi_host_unregister(&dsi->host); -err_ddp_comp_unregister: mtk_ddp_comp_unregister(drm, &dsi->ddp_comp); return ret; } @@ -1078,7 +1146,6 @@ static void mtk_dsi_unbind(struct device *dev, struct device *master, struct mtk_dsi *dsi = dev_get_drvdata(dev); mtk_dsi_destroy_conn_enc(dsi); - mipi_dsi_host_unregister(&dsi->host); mtk_ddp_comp_unregister(drm, &dsi->ddp_comp); } @@ -1102,31 +1169,38 @@ static int mtk_dsi_probe(struct platform_device *pdev) dsi->host.ops = &mtk_dsi_ops; dsi->host.dev = dev; + ret = mipi_dsi_host_register(&dsi->host); + if (ret < 0) { + dev_err(dev, "failed to register DSI host: %d\n", ret); + return ret; + } ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0, &dsi->panel, &dsi->bridge); if (ret) - return ret; + goto err_unregister_host; + + dsi->driver_data = of_device_get_match_data(dev); dsi->engine_clk = devm_clk_get(dev, "engine"); if (IS_ERR(dsi->engine_clk)) { ret = PTR_ERR(dsi->engine_clk); dev_err(dev, "Failed to get engine clock: %d\n", ret); - return ret; + goto err_unregister_host; } dsi->digital_clk = devm_clk_get(dev, "digital"); if (IS_ERR(dsi->digital_clk)) { ret = PTR_ERR(dsi->digital_clk); dev_err(dev, "Failed to get digital clock: %d\n", ret); - return ret; + goto err_unregister_host; } dsi->hs_clk = devm_clk_get(dev, "hs"); if (IS_ERR(dsi->hs_clk)) { ret = PTR_ERR(dsi->hs_clk); dev_err(dev, "Failed to get hs clock: %d\n", ret); - return ret; + goto err_unregister_host; } regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1134,33 +1208,35 @@ static int mtk_dsi_probe(struct platform_device *pdev) if (IS_ERR(dsi->regs)) { ret = PTR_ERR(dsi->regs); dev_err(dev, "Failed to ioremap memory: %d\n", ret); - return ret; + goto err_unregister_host; } dsi->phy = devm_phy_get(dev, "dphy"); if (IS_ERR(dsi->phy)) { ret = PTR_ERR(dsi->phy); dev_err(dev, "Failed to get MIPI-DPHY: %d\n", ret); - return ret; + goto err_unregister_host; } comp_id = mtk_ddp_comp_get_id(dev->of_node, MTK_DSI); if (comp_id < 0) { dev_err(dev, "Failed to identify by alias: %d\n", comp_id); - return comp_id; + ret = comp_id; + goto err_unregister_host; } ret = mtk_ddp_comp_init(dev, dev->of_node, &dsi->ddp_comp, comp_id, &mtk_dsi_funcs); if (ret) { dev_err(dev, "Failed to initialize component: %d\n", ret); - return ret; + goto err_unregister_host; } irq_num = platform_get_irq(pdev, 0); if (irq_num < 0) { - dev_err(&pdev->dev, "failed to request dsi irq resource\n"); - return -EPROBE_DEFER; + dev_err(&pdev->dev, "failed to get dsi irq_num: %d\n", irq_num); + ret = irq_num; + goto err_unregister_host; } irq_set_status_flags(irq_num, IRQ_TYPE_LEVEL_LOW); @@ -1168,14 +1244,24 @@ static int mtk_dsi_probe(struct platform_device *pdev) IRQF_TRIGGER_LOW, dev_name(&pdev->dev), dsi); if (ret) { dev_err(&pdev->dev, "failed to request mediatek dsi irq\n"); - return -EPROBE_DEFER; + goto err_unregister_host; } init_waitqueue_head(&dsi->irq_wait_queue); platform_set_drvdata(pdev, dsi); - return component_add(&pdev->dev, &mtk_dsi_component_ops); + ret = component_add(&pdev->dev, &mtk_dsi_component_ops); + if (ret) { + dev_err(&pdev->dev, "failed to add component: %d\n", ret); + goto err_unregister_host; + } + + return 0; + +err_unregister_host: + mipi_dsi_host_unregister(&dsi->host); + return ret; } static int mtk_dsi_remove(struct platform_device *pdev) @@ -1184,13 +1270,32 @@ static int mtk_dsi_remove(struct platform_device *pdev) mtk_output_dsi_disable(dsi); component_del(&pdev->dev, &mtk_dsi_component_ops); + mipi_dsi_host_unregister(&dsi->host); return 0; } +static const struct mtk_dsi_driver_data mt8173_dsi_driver_data = { + .reg_cmdq_off = 0x200, +}; + +static const struct mtk_dsi_driver_data mt2701_dsi_driver_data = { + .reg_cmdq_off = 0x180, +}; + +static const struct mtk_dsi_driver_data mt8183_dsi_driver_data = { + .reg_cmdq_off = 0x200, + .has_shadow_ctl = true, + .has_size_ctl = true, +}; + static const struct of_device_id mtk_dsi_of_match[] = { - { .compatible = "mediatek,mt2701-dsi" }, - { .compatible = "mediatek,mt8173-dsi" }, + { .compatible = "mediatek,mt2701-dsi", + .data = &mt2701_dsi_driver_data }, + { .compatible = "mediatek,mt8173-dsi", + .data = &mt8173_dsi_driver_data }, + { .compatible = "mediatek,mt8183-dsi", + .data = &mt8183_dsi_driver_data }, { }, }; diff --git a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c index 1842dc2caae9..e4d34484ecc8 100644 --- a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c +++ b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c @@ -3,292 +3,39 @@ * Copyright (c) 2015 MediaTek Inc. */ -#include <linux/clk.h> -#include <linux/clk-provider.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/module.h> -#include <linux/of_device.h> -#include <linux/platform_device.h> -#include <linux/phy/phy.h> - -#define MIPITX_DSI_CON 0x00 -#define RG_DSI_LDOCORE_EN BIT(0) -#define RG_DSI_CKG_LDOOUT_EN BIT(1) -#define RG_DSI_BCLK_SEL (3 << 2) -#define RG_DSI_LD_IDX_SEL (7 << 4) -#define RG_DSI_PHYCLK_SEL (2 << 8) -#define RG_DSI_DSICLK_FREQ_SEL BIT(10) -#define RG_DSI_LPTX_CLMP_EN BIT(11) - -#define MIPITX_DSI_CLOCK_LANE 0x04 -#define MIPITX_DSI_DATA_LANE0 0x08 -#define MIPITX_DSI_DATA_LANE1 0x0c -#define MIPITX_DSI_DATA_LANE2 0x10 -#define MIPITX_DSI_DATA_LANE3 0x14 -#define RG_DSI_LNTx_LDOOUT_EN BIT(0) -#define RG_DSI_LNTx_CKLANE_EN BIT(1) -#define RG_DSI_LNTx_LPTX_IPLUS1 BIT(2) -#define RG_DSI_LNTx_LPTX_IPLUS2 BIT(3) -#define RG_DSI_LNTx_LPTX_IMINUS BIT(4) -#define RG_DSI_LNTx_LPCD_IPLUS BIT(5) -#define RG_DSI_LNTx_LPCD_IMINUS BIT(6) -#define RG_DSI_LNTx_RT_CODE (0xf << 8) - -#define MIPITX_DSI_TOP_CON 0x40 -#define RG_DSI_LNT_INTR_EN BIT(0) -#define RG_DSI_LNT_HS_BIAS_EN BIT(1) -#define RG_DSI_LNT_IMP_CAL_EN BIT(2) -#define RG_DSI_LNT_TESTMODE_EN BIT(3) -#define RG_DSI_LNT_IMP_CAL_CODE (0xf << 4) -#define RG_DSI_LNT_AIO_SEL (7 << 8) -#define RG_DSI_PAD_TIE_LOW_EN BIT(11) -#define RG_DSI_DEBUG_INPUT_EN BIT(12) -#define RG_DSI_PRESERVE (7 << 13) - -#define MIPITX_DSI_BG_CON 0x44 -#define RG_DSI_BG_CORE_EN BIT(0) -#define RG_DSI_BG_CKEN BIT(1) -#define RG_DSI_BG_DIV (0x3 << 2) -#define RG_DSI_BG_FAST_CHARGE BIT(4) -#define RG_DSI_VOUT_MSK (0x3ffff << 5) -#define RG_DSI_V12_SEL (7 << 5) -#define RG_DSI_V10_SEL (7 << 8) -#define RG_DSI_V072_SEL (7 << 11) -#define RG_DSI_V04_SEL (7 << 14) -#define RG_DSI_V032_SEL (7 << 17) -#define RG_DSI_V02_SEL (7 << 20) -#define RG_DSI_BG_R1_TRIM (0xf << 24) -#define RG_DSI_BG_R2_TRIM (0xf << 28) - -#define MIPITX_DSI_PLL_CON0 0x50 -#define RG_DSI_MPPLL_PLL_EN BIT(0) -#define RG_DSI_MPPLL_DIV_MSK (0x1ff << 1) -#define RG_DSI_MPPLL_PREDIV (3 << 1) -#define RG_DSI_MPPLL_TXDIV0 (3 << 3) -#define RG_DSI_MPPLL_TXDIV1 (3 << 5) -#define RG_DSI_MPPLL_POSDIV (7 << 7) -#define RG_DSI_MPPLL_MONVC_EN BIT(10) -#define RG_DSI_MPPLL_MONREF_EN BIT(11) -#define RG_DSI_MPPLL_VOD_EN BIT(12) - -#define MIPITX_DSI_PLL_CON1 0x54 -#define RG_DSI_MPPLL_SDM_FRA_EN BIT(0) -#define RG_DSI_MPPLL_SDM_SSC_PH_INIT BIT(1) -#define RG_DSI_MPPLL_SDM_SSC_EN BIT(2) -#define RG_DSI_MPPLL_SDM_SSC_PRD (0xffff << 16) - -#define MIPITX_DSI_PLL_CON2 0x58 - -#define MIPITX_DSI_PLL_TOP 0x64 -#define RG_DSI_MPPLL_PRESERVE (0xff << 8) - -#define MIPITX_DSI_PLL_PWR 0x68 -#define RG_DSI_MPPLL_SDM_PWR_ON BIT(0) -#define RG_DSI_MPPLL_SDM_ISO_EN BIT(1) -#define RG_DSI_MPPLL_SDM_PWR_ACK BIT(8) - -#define MIPITX_DSI_SW_CTRL 0x80 -#define SW_CTRL_EN BIT(0) - -#define MIPITX_DSI_SW_CTRL_CON0 0x84 -#define SW_LNTC_LPTX_PRE_OE BIT(0) -#define SW_LNTC_LPTX_OE BIT(1) -#define SW_LNTC_LPTX_P BIT(2) -#define SW_LNTC_LPTX_N BIT(3) -#define SW_LNTC_HSTX_PRE_OE BIT(4) -#define SW_LNTC_HSTX_OE BIT(5) -#define SW_LNTC_HSTX_ZEROCLK BIT(6) -#define SW_LNT0_LPTX_PRE_OE BIT(7) -#define SW_LNT0_LPTX_OE BIT(8) -#define SW_LNT0_LPTX_P BIT(9) -#define SW_LNT0_LPTX_N BIT(10) -#define SW_LNT0_HSTX_PRE_OE BIT(11) -#define SW_LNT0_HSTX_OE BIT(12) -#define SW_LNT0_LPRX_EN BIT(13) -#define SW_LNT1_LPTX_PRE_OE BIT(14) -#define SW_LNT1_LPTX_OE BIT(15) -#define SW_LNT1_LPTX_P BIT(16) -#define SW_LNT1_LPTX_N BIT(17) -#define SW_LNT1_HSTX_PRE_OE BIT(18) -#define SW_LNT1_HSTX_OE BIT(19) -#define SW_LNT2_LPTX_PRE_OE BIT(20) -#define SW_LNT2_LPTX_OE BIT(21) -#define SW_LNT2_LPTX_P BIT(22) -#define SW_LNT2_LPTX_N BIT(23) -#define SW_LNT2_HSTX_PRE_OE BIT(24) -#define SW_LNT2_HSTX_OE BIT(25) - -struct mtk_mipitx_data { - const u32 mppll_preserve; -}; - -struct mtk_mipi_tx { - struct device *dev; - void __iomem *regs; - u32 data_rate; - const struct mtk_mipitx_data *driver_data; - struct clk_hw pll_hw; - struct clk *pll; -}; +#include "mtk_mipi_tx.h" -static inline struct mtk_mipi_tx *mtk_mipi_tx_from_clk_hw(struct clk_hw *hw) +inline struct mtk_mipi_tx *mtk_mipi_tx_from_clk_hw(struct clk_hw *hw) { return container_of(hw, struct mtk_mipi_tx, pll_hw); } -static void mtk_mipi_tx_clear_bits(struct mtk_mipi_tx *mipi_tx, u32 offset, - u32 bits) +void mtk_mipi_tx_clear_bits(struct mtk_mipi_tx *mipi_tx, u32 offset, + u32 bits) { u32 temp = readl(mipi_tx->regs + offset); writel(temp & ~bits, mipi_tx->regs + offset); } -static void mtk_mipi_tx_set_bits(struct mtk_mipi_tx *mipi_tx, u32 offset, - u32 bits) +void mtk_mipi_tx_set_bits(struct mtk_mipi_tx *mipi_tx, u32 offset, + u32 bits) { u32 temp = readl(mipi_tx->regs + offset); writel(temp | bits, mipi_tx->regs + offset); } -static void mtk_mipi_tx_update_bits(struct mtk_mipi_tx *mipi_tx, u32 offset, - u32 mask, u32 data) +void mtk_mipi_tx_update_bits(struct mtk_mipi_tx *mipi_tx, u32 offset, + u32 mask, u32 data) { u32 temp = readl(mipi_tx->regs + offset); writel((temp & ~mask) | (data & mask), mipi_tx->regs + offset); } -static int mtk_mipi_tx_pll_prepare(struct clk_hw *hw) -{ - struct mtk_mipi_tx *mipi_tx = mtk_mipi_tx_from_clk_hw(hw); - u8 txdiv, txdiv0, txdiv1; - u64 pcw; - - dev_dbg(mipi_tx->dev, "prepare: %u Hz\n", mipi_tx->data_rate); - - if (mipi_tx->data_rate >= 500000000) { - txdiv = 1; - txdiv0 = 0; - txdiv1 = 0; - } else if (mipi_tx->data_rate >= 250000000) { - txdiv = 2; - txdiv0 = 1; - txdiv1 = 0; - } else if (mipi_tx->data_rate >= 125000000) { - txdiv = 4; - txdiv0 = 2; - txdiv1 = 0; - } else if (mipi_tx->data_rate > 62000000) { - txdiv = 8; - txdiv0 = 2; - txdiv1 = 1; - } else if (mipi_tx->data_rate >= 50000000) { - txdiv = 16; - txdiv0 = 2; - txdiv1 = 2; - } else { - return -EINVAL; - } - - mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_BG_CON, - RG_DSI_VOUT_MSK | - RG_DSI_BG_CKEN | RG_DSI_BG_CORE_EN, - (4 << 20) | (4 << 17) | (4 << 14) | - (4 << 11) | (4 << 8) | (4 << 5) | - RG_DSI_BG_CKEN | RG_DSI_BG_CORE_EN); - - usleep_range(30, 100); - - mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_TOP_CON, - RG_DSI_LNT_IMP_CAL_CODE | RG_DSI_LNT_HS_BIAS_EN, - (8 << 4) | RG_DSI_LNT_HS_BIAS_EN); - - mtk_mipi_tx_set_bits(mipi_tx, MIPITX_DSI_CON, - RG_DSI_CKG_LDOOUT_EN | RG_DSI_LDOCORE_EN); - - mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_PWR, - RG_DSI_MPPLL_SDM_PWR_ON | - RG_DSI_MPPLL_SDM_ISO_EN, - RG_DSI_MPPLL_SDM_PWR_ON); - - mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_PLL_CON0, - RG_DSI_MPPLL_PLL_EN); - - mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_CON0, - RG_DSI_MPPLL_TXDIV0 | RG_DSI_MPPLL_TXDIV1 | - RG_DSI_MPPLL_PREDIV, - (txdiv0 << 3) | (txdiv1 << 5)); - - /* - * PLL PCW config - * PCW bit 24~30 = integer part of pcw - * PCW bit 0~23 = fractional part of pcw - * pcw = data_Rate*4*txdiv/(Ref_clk*2); - * Post DIV =4, so need data_Rate*4 - * Ref_clk is 26MHz - */ - pcw = div_u64(((u64)mipi_tx->data_rate * 2 * txdiv) << 24, - 26000000); - writel(pcw, mipi_tx->regs + MIPITX_DSI_PLL_CON2); - - mtk_mipi_tx_set_bits(mipi_tx, MIPITX_DSI_PLL_CON1, - RG_DSI_MPPLL_SDM_FRA_EN); - - mtk_mipi_tx_set_bits(mipi_tx, MIPITX_DSI_PLL_CON0, RG_DSI_MPPLL_PLL_EN); - - usleep_range(20, 100); - - mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_PLL_CON1, - RG_DSI_MPPLL_SDM_SSC_EN); - - mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_TOP, - RG_DSI_MPPLL_PRESERVE, - mipi_tx->driver_data->mppll_preserve); - - return 0; -} - -static void mtk_mipi_tx_pll_unprepare(struct clk_hw *hw) -{ - struct mtk_mipi_tx *mipi_tx = mtk_mipi_tx_from_clk_hw(hw); - - dev_dbg(mipi_tx->dev, "unprepare\n"); - - mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_PLL_CON0, - RG_DSI_MPPLL_PLL_EN); - - mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_TOP, - RG_DSI_MPPLL_PRESERVE, 0); - - mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_PWR, - RG_DSI_MPPLL_SDM_ISO_EN | - RG_DSI_MPPLL_SDM_PWR_ON, - RG_DSI_MPPLL_SDM_ISO_EN); - - mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_TOP_CON, - RG_DSI_LNT_HS_BIAS_EN); - - mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_CON, - RG_DSI_CKG_LDOOUT_EN | RG_DSI_LDOCORE_EN); - - mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_BG_CON, - RG_DSI_BG_CKEN | RG_DSI_BG_CORE_EN); - - mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_PLL_CON0, - RG_DSI_MPPLL_DIV_MSK); -} - -static long mtk_mipi_tx_pll_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) -{ - return clamp_val(rate, 50000000, 1250000000); -} - -static int mtk_mipi_tx_pll_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) +int mtk_mipi_tx_pll_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) { struct mtk_mipi_tx *mipi_tx = mtk_mipi_tx_from_clk_hw(hw); @@ -299,37 +46,14 @@ static int mtk_mipi_tx_pll_set_rate(struct clk_hw *hw, unsigned long rate, return 0; } -static unsigned long mtk_mipi_tx_pll_recalc_rate(struct clk_hw *hw, - unsigned long parent_rate) +unsigned long mtk_mipi_tx_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) { struct mtk_mipi_tx *mipi_tx = mtk_mipi_tx_from_clk_hw(hw); return mipi_tx->data_rate; } -static const struct clk_ops mtk_mipi_tx_pll_ops = { - .prepare = mtk_mipi_tx_pll_prepare, - .unprepare = mtk_mipi_tx_pll_unprepare, - .round_rate = mtk_mipi_tx_pll_round_rate, - .set_rate = mtk_mipi_tx_pll_set_rate, - .recalc_rate = mtk_mipi_tx_pll_recalc_rate, -}; - -static int mtk_mipi_tx_power_on_signal(struct phy *phy) -{ - struct mtk_mipi_tx *mipi_tx = phy_get_drvdata(phy); - u32 reg; - - for (reg = MIPITX_DSI_CLOCK_LANE; - reg <= MIPITX_DSI_DATA_LANE3; reg += 4) - mtk_mipi_tx_set_bits(mipi_tx, reg, RG_DSI_LNTx_LDOOUT_EN); - - mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_TOP_CON, - RG_DSI_PAD_TIE_LOW_EN); - - return 0; -} - static int mtk_mipi_tx_power_on(struct phy *phy) { struct mtk_mipi_tx *mipi_tx = phy_get_drvdata(phy); @@ -341,30 +65,16 @@ static int mtk_mipi_tx_power_on(struct phy *phy) return ret; /* Enable DSI Lane LDO outputs, disable pad tie low */ - mtk_mipi_tx_power_on_signal(phy); - + mipi_tx->driver_data->mipi_tx_enable_signal(phy); return 0; } -static void mtk_mipi_tx_power_off_signal(struct phy *phy) -{ - struct mtk_mipi_tx *mipi_tx = phy_get_drvdata(phy); - u32 reg; - - mtk_mipi_tx_set_bits(mipi_tx, MIPITX_DSI_TOP_CON, - RG_DSI_PAD_TIE_LOW_EN); - - for (reg = MIPITX_DSI_CLOCK_LANE; - reg <= MIPITX_DSI_DATA_LANE3; reg += 4) - mtk_mipi_tx_clear_bits(mipi_tx, reg, RG_DSI_LNTx_LDOOUT_EN); -} - static int mtk_mipi_tx_power_off(struct phy *phy) { struct mtk_mipi_tx *mipi_tx = phy_get_drvdata(phy); /* Enable pad tie low, disable DSI Lane LDO outputs */ - mtk_mipi_tx_power_off_signal(phy); + mipi_tx->driver_data->mipi_tx_disable_signal(phy); /* Disable PLL and power down core */ clk_disable_unprepare(mipi_tx->pll); @@ -383,10 +93,9 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct mtk_mipi_tx *mipi_tx; struct resource *mem; - struct clk *ref_clk; const char *ref_clk_name; + struct clk *ref_clk; struct clk_init_data clk_init = { - .ops = &mtk_mipi_tx_pll_ops, .num_parents = 1, .parent_names = (const char * const *)&ref_clk_name, .flags = CLK_SET_RATE_GATE, @@ -400,6 +109,7 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev) return -ENOMEM; mipi_tx->driver_data = of_device_get_match_data(dev); + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); mipi_tx->regs = devm_ioremap_resource(dev, mem); if (IS_ERR(mipi_tx->regs)) { @@ -414,6 +124,7 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev) dev_err(dev, "Failed to get reference clock: %d\n", ret); return ret; } + ref_clk_name = __clk_get_name(ref_clk); ret = of_property_read_string(dev->of_node, "clock-output-names", @@ -423,6 +134,8 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev) return ret; } + clk_init.ops = mipi_tx->driver_data->mipi_tx_clk_ops; + mipi_tx->pll_hw.init = &clk_init; mipi_tx->pll = devm_clk_register(dev, &mipi_tx->pll_hw); if (IS_ERR(mipi_tx->pll)) { @@ -457,20 +170,14 @@ static int mtk_mipi_tx_remove(struct platform_device *pdev) return 0; } -static const struct mtk_mipitx_data mt2701_mipitx_data = { - .mppll_preserve = (3 << 8) -}; - -static const struct mtk_mipitx_data mt8173_mipitx_data = { - .mppll_preserve = (0 << 8) -}; - static const struct of_device_id mtk_mipi_tx_match[] = { { .compatible = "mediatek,mt2701-mipi-tx", .data = &mt2701_mipitx_data }, { .compatible = "mediatek,mt8173-mipi-tx", .data = &mt8173_mipitx_data }, - {}, + { .compatible = "mediatek,mt8183-mipi-tx", + .data = &mt8183_mipitx_data }, + { }, }; struct platform_driver mtk_mipi_tx_driver = { @@ -481,3 +188,4 @@ struct platform_driver mtk_mipi_tx_driver = { .of_match_table = mtk_mipi_tx_match, }, }; + diff --git a/drivers/gpu/drm/mediatek/mtk_mipi_tx.h b/drivers/gpu/drm/mediatek/mtk_mipi_tx.h new file mode 100644 index 000000000000..413f35d86219 --- /dev/null +++ b/drivers/gpu/drm/mediatek/mtk_mipi_tx.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2019 MediaTek Inc. + * Author: Jitao Shi <jitao.shi@mediatek.com> + */ + +#ifndef _MTK_MIPI_TX_H +#define _MTK_MIPI_TX_H + +#include <linux/clk.h> +#include <linux/clk-provider.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/phy/phy.h> + +struct mtk_mipitx_data { + const u32 mppll_preserve; + const struct clk_ops *mipi_tx_clk_ops; + void (*mipi_tx_enable_signal)(struct phy *phy); + void (*mipi_tx_disable_signal)(struct phy *phy); +}; + +struct mtk_mipi_tx { + struct device *dev; + void __iomem *regs; + u32 data_rate; + const struct mtk_mipitx_data *driver_data; + struct clk_hw pll_hw; + struct clk *pll; +}; + +struct mtk_mipi_tx *mtk_mipi_tx_from_clk_hw(struct clk_hw *hw); +void mtk_mipi_tx_clear_bits(struct mtk_mipi_tx *mipi_tx, u32 offset, u32 bits); +void mtk_mipi_tx_set_bits(struct mtk_mipi_tx *mipi_tx, u32 offset, u32 bits); +void mtk_mipi_tx_update_bits(struct mtk_mipi_tx *mipi_tx, u32 offset, u32 mask, + u32 data); +int mtk_mipi_tx_pll_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate); +unsigned long mtk_mipi_tx_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate); + +extern const struct mtk_mipitx_data mt2701_mipitx_data; +extern const struct mtk_mipitx_data mt8173_mipitx_data; +extern const struct mtk_mipitx_data mt8183_mipitx_data; + +#endif diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_mipi_tx.c b/drivers/gpu/drm/mediatek/mtk_mt8173_mipi_tx.c new file mode 100644 index 000000000000..f18db14d8b63 --- /dev/null +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_mipi_tx.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 MediaTek Inc. + * Author: jitao.shi <jitao.shi@mediatek.com> + */ + +#include "mtk_mipi_tx.h" + +#define MIPITX_DSI_CON 0x00 +#define RG_DSI_LDOCORE_EN BIT(0) +#define RG_DSI_CKG_LDOOUT_EN BIT(1) +#define RG_DSI_BCLK_SEL (3 << 2) +#define RG_DSI_LD_IDX_SEL (7 << 4) +#define RG_DSI_PHYCLK_SEL (2 << 8) +#define RG_DSI_DSICLK_FREQ_SEL BIT(10) +#define RG_DSI_LPTX_CLMP_EN BIT(11) + +#define MIPITX_DSI_CLOCK_LANE 0x04 +#define MIPITX_DSI_DATA_LANE0 0x08 +#define MIPITX_DSI_DATA_LANE1 0x0c +#define MIPITX_DSI_DATA_LANE2 0x10 +#define MIPITX_DSI_DATA_LANE3 0x14 +#define RG_DSI_LNTx_LDOOUT_EN BIT(0) +#define RG_DSI_LNTx_CKLANE_EN BIT(1) +#define RG_DSI_LNTx_LPTX_IPLUS1 BIT(2) +#define RG_DSI_LNTx_LPTX_IPLUS2 BIT(3) +#define RG_DSI_LNTx_LPTX_IMINUS BIT(4) +#define RG_DSI_LNTx_LPCD_IPLUS BIT(5) +#define RG_DSI_LNTx_LPCD_IMINUS BIT(6) +#define RG_DSI_LNTx_RT_CODE (0xf << 8) + +#define MIPITX_DSI_TOP_CON 0x40 +#define RG_DSI_LNT_INTR_EN BIT(0) +#define RG_DSI_LNT_HS_BIAS_EN BIT(1) +#define RG_DSI_LNT_IMP_CAL_EN BIT(2) +#define RG_DSI_LNT_TESTMODE_EN BIT(3) +#define RG_DSI_LNT_IMP_CAL_CODE (0xf << 4) +#define RG_DSI_LNT_AIO_SEL (7 << 8) +#define RG_DSI_PAD_TIE_LOW_EN BIT(11) +#define RG_DSI_DEBUG_INPUT_EN BIT(12) +#define RG_DSI_PRESERVE (7 << 13) + +#define MIPITX_DSI_BG_CON 0x44 +#define RG_DSI_BG_CORE_EN BIT(0) +#define RG_DSI_BG_CKEN BIT(1) +#define RG_DSI_BG_DIV (0x3 << 2) +#define RG_DSI_BG_FAST_CHARGE BIT(4) +#define RG_DSI_VOUT_MSK (0x3ffff << 5) +#define RG_DSI_V12_SEL (7 << 5) +#define RG_DSI_V10_SEL (7 << 8) +#define RG_DSI_V072_SEL (7 << 11) +#define RG_DSI_V04_SEL (7 << 14) +#define RG_DSI_V032_SEL (7 << 17) +#define RG_DSI_V02_SEL (7 << 20) +#define RG_DSI_BG_R1_TRIM (0xf << 24) +#define RG_DSI_BG_R2_TRIM (0xf << 28) + +#define MIPITX_DSI_PLL_CON0 0x50 +#define RG_DSI_MPPLL_PLL_EN BIT(0) +#define RG_DSI_MPPLL_DIV_MSK (0x1ff << 1) +#define RG_DSI_MPPLL_PREDIV (3 << 1) +#define RG_DSI_MPPLL_TXDIV0 (3 << 3) +#define RG_DSI_MPPLL_TXDIV1 (3 << 5) +#define RG_DSI_MPPLL_POSDIV (7 << 7) +#define RG_DSI_MPPLL_MONVC_EN BIT(10) +#define RG_DSI_MPPLL_MONREF_EN BIT(11) +#define RG_DSI_MPPLL_VOD_EN BIT(12) + +#define MIPITX_DSI_PLL_CON1 0x54 +#define RG_DSI_MPPLL_SDM_FRA_EN BIT(0) +#define RG_DSI_MPPLL_SDM_SSC_PH_INIT BIT(1) +#define RG_DSI_MPPLL_SDM_SSC_EN BIT(2) +#define RG_DSI_MPPLL_SDM_SSC_PRD (0xffff << 16) + +#define MIPITX_DSI_PLL_CON2 0x58 + +#define MIPITX_DSI_PLL_TOP 0x64 +#define RG_DSI_MPPLL_PRESERVE (0xff << 8) + +#define MIPITX_DSI_PLL_PWR 0x68 +#define RG_DSI_MPPLL_SDM_PWR_ON BIT(0) +#define RG_DSI_MPPLL_SDM_ISO_EN BIT(1) +#define RG_DSI_MPPLL_SDM_PWR_ACK BIT(8) + +#define MIPITX_DSI_SW_CTRL 0x80 +#define SW_CTRL_EN BIT(0) + +#define MIPITX_DSI_SW_CTRL_CON0 0x84 +#define SW_LNTC_LPTX_PRE_OE BIT(0) +#define SW_LNTC_LPTX_OE BIT(1) +#define SW_LNTC_LPTX_P BIT(2) +#define SW_LNTC_LPTX_N BIT(3) +#define SW_LNTC_HSTX_PRE_OE BIT(4) +#define SW_LNTC_HSTX_OE BIT(5) +#define SW_LNTC_HSTX_ZEROCLK BIT(6) +#define SW_LNT0_LPTX_PRE_OE BIT(7) +#define SW_LNT0_LPTX_OE BIT(8) +#define SW_LNT0_LPTX_P BIT(9) +#define SW_LNT0_LPTX_N BIT(10) +#define SW_LNT0_HSTX_PRE_OE BIT(11) +#define SW_LNT0_HSTX_OE BIT(12) +#define SW_LNT0_LPRX_EN BIT(13) +#define SW_LNT1_LPTX_PRE_OE BIT(14) +#define SW_LNT1_LPTX_OE BIT(15) +#define SW_LNT1_LPTX_P BIT(16) +#define SW_LNT1_LPTX_N BIT(17) +#define SW_LNT1_HSTX_PRE_OE BIT(18) +#define SW_LNT1_HSTX_OE BIT(19) +#define SW_LNT2_LPTX_PRE_OE BIT(20) +#define SW_LNT2_LPTX_OE BIT(21) +#define SW_LNT2_LPTX_P BIT(22) +#define SW_LNT2_LPTX_N BIT(23) +#define SW_LNT2_HSTX_PRE_OE BIT(24) +#define SW_LNT2_HSTX_OE BIT(25) + +static int mtk_mipi_tx_pll_prepare(struct clk_hw *hw) +{ + struct mtk_mipi_tx *mipi_tx = mtk_mipi_tx_from_clk_hw(hw); + u8 txdiv, txdiv0, txdiv1; + u64 pcw; + + dev_dbg(mipi_tx->dev, "prepare: %u Hz\n", mipi_tx->data_rate); + + if (mipi_tx->data_rate >= 500000000) { + txdiv = 1; + txdiv0 = 0; + txdiv1 = 0; + } else if (mipi_tx->data_rate >= 250000000) { + txdiv = 2; + txdiv0 = 1; + txdiv1 = 0; + } else if (mipi_tx->data_rate >= 125000000) { + txdiv = 4; + txdiv0 = 2; + txdiv1 = 0; + } else if (mipi_tx->data_rate > 62000000) { + txdiv = 8; + txdiv0 = 2; + txdiv1 = 1; + } else if (mipi_tx->data_rate >= 50000000) { + txdiv = 16; + txdiv0 = 2; + txdiv1 = 2; + } else { + return -EINVAL; + } + + mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_BG_CON, + RG_DSI_VOUT_MSK | + RG_DSI_BG_CKEN | RG_DSI_BG_CORE_EN, + (4 << 20) | (4 << 17) | (4 << 14) | + (4 << 11) | (4 << 8) | (4 << 5) | + RG_DSI_BG_CKEN | RG_DSI_BG_CORE_EN); + + usleep_range(30, 100); + + mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_TOP_CON, + RG_DSI_LNT_IMP_CAL_CODE | RG_DSI_LNT_HS_BIAS_EN, + (8 << 4) | RG_DSI_LNT_HS_BIAS_EN); + + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_DSI_CON, + RG_DSI_CKG_LDOOUT_EN | RG_DSI_LDOCORE_EN); + + mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_PWR, + RG_DSI_MPPLL_SDM_PWR_ON | + RG_DSI_MPPLL_SDM_ISO_EN, + RG_DSI_MPPLL_SDM_PWR_ON); + + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_PLL_CON0, + RG_DSI_MPPLL_PLL_EN); + + mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_CON0, + RG_DSI_MPPLL_TXDIV0 | RG_DSI_MPPLL_TXDIV1 | + RG_DSI_MPPLL_PREDIV, + (txdiv0 << 3) | (txdiv1 << 5)); + + /* + * PLL PCW config + * PCW bit 24~30 = integer part of pcw + * PCW bit 0~23 = fractional part of pcw + * pcw = data_Rate*4*txdiv/(Ref_clk*2); + * Post DIV =4, so need data_Rate*4 + * Ref_clk is 26MHz + */ + pcw = div_u64(((u64)mipi_tx->data_rate * 2 * txdiv) << 24, + 26000000); + writel(pcw, mipi_tx->regs + MIPITX_DSI_PLL_CON2); + + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_DSI_PLL_CON1, + RG_DSI_MPPLL_SDM_FRA_EN); + + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_DSI_PLL_CON0, RG_DSI_MPPLL_PLL_EN); + + usleep_range(20, 100); + + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_PLL_CON1, + RG_DSI_MPPLL_SDM_SSC_EN); + + mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_TOP, + RG_DSI_MPPLL_PRESERVE, + mipi_tx->driver_data->mppll_preserve); + + return 0; +} + +static void mtk_mipi_tx_pll_unprepare(struct clk_hw *hw) +{ + struct mtk_mipi_tx *mipi_tx = mtk_mipi_tx_from_clk_hw(hw); + + dev_dbg(mipi_tx->dev, "unprepare\n"); + + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_PLL_CON0, + RG_DSI_MPPLL_PLL_EN); + + mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_TOP, + RG_DSI_MPPLL_PRESERVE, 0); + + mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_PWR, + RG_DSI_MPPLL_SDM_ISO_EN | + RG_DSI_MPPLL_SDM_PWR_ON, + RG_DSI_MPPLL_SDM_ISO_EN); + + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_TOP_CON, + RG_DSI_LNT_HS_BIAS_EN); + + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_CON, + RG_DSI_CKG_LDOOUT_EN | RG_DSI_LDOCORE_EN); + + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_BG_CON, + RG_DSI_BG_CKEN | RG_DSI_BG_CORE_EN); + + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_PLL_CON0, + RG_DSI_MPPLL_DIV_MSK); +} + +static long mtk_mipi_tx_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + return clamp_val(rate, 50000000, 1250000000); +} + +static const struct clk_ops mtk_mipi_tx_pll_ops = { + .prepare = mtk_mipi_tx_pll_prepare, + .unprepare = mtk_mipi_tx_pll_unprepare, + .round_rate = mtk_mipi_tx_pll_round_rate, + .set_rate = mtk_mipi_tx_pll_set_rate, + .recalc_rate = mtk_mipi_tx_pll_recalc_rate, +}; + +static void mtk_mipi_tx_power_on_signal(struct phy *phy) +{ + struct mtk_mipi_tx *mipi_tx = phy_get_drvdata(phy); + u32 reg; + + for (reg = MIPITX_DSI_CLOCK_LANE; + reg <= MIPITX_DSI_DATA_LANE3; reg += 4) + mtk_mipi_tx_set_bits(mipi_tx, reg, RG_DSI_LNTx_LDOOUT_EN); + + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_TOP_CON, + RG_DSI_PAD_TIE_LOW_EN); +} + +static void mtk_mipi_tx_power_off_signal(struct phy *phy) +{ + struct mtk_mipi_tx *mipi_tx = phy_get_drvdata(phy); + u32 reg; + + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_DSI_TOP_CON, + RG_DSI_PAD_TIE_LOW_EN); + + for (reg = MIPITX_DSI_CLOCK_LANE; + reg <= MIPITX_DSI_DATA_LANE3; reg += 4) + mtk_mipi_tx_clear_bits(mipi_tx, reg, RG_DSI_LNTx_LDOOUT_EN); +} + +const struct mtk_mipitx_data mt2701_mipitx_data = { + .mppll_preserve = (3 << 8), + .mipi_tx_clk_ops = &mtk_mipi_tx_pll_ops, + .mipi_tx_enable_signal = mtk_mipi_tx_power_on_signal, + .mipi_tx_disable_signal = mtk_mipi_tx_power_off_signal, +}; + +const struct mtk_mipitx_data mt8173_mipitx_data = { + .mppll_preserve = (0 << 8), + .mipi_tx_clk_ops = &mtk_mipi_tx_pll_ops, + .mipi_tx_enable_signal = mtk_mipi_tx_power_on_signal, + .mipi_tx_disable_signal = mtk_mipi_tx_power_off_signal, +}; diff --git a/drivers/gpu/drm/mediatek/mtk_mt8183_mipi_tx.c b/drivers/gpu/drm/mediatek/mtk_mt8183_mipi_tx.c new file mode 100644 index 000000000000..91f08a351fd0 --- /dev/null +++ b/drivers/gpu/drm/mediatek/mtk_mt8183_mipi_tx.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 MediaTek Inc. + * Author: jitao.shi <jitao.shi@mediatek.com> + */ + +#include "mtk_mipi_tx.h" + +#define MIPITX_LANE_CON 0x000c +#define RG_DSI_CPHY_T1DRV_EN BIT(0) +#define RG_DSI_ANA_CK_SEL BIT(1) +#define RG_DSI_PHY_CK_SEL BIT(2) +#define RG_DSI_CPHY_EN BIT(3) +#define RG_DSI_PHYCK_INV_EN BIT(4) +#define RG_DSI_PWR04_EN BIT(5) +#define RG_DSI_BG_LPF_EN BIT(6) +#define RG_DSI_BG_CORE_EN BIT(7) +#define RG_DSI_PAD_TIEL_SEL BIT(8) + +#define MIPITX_PLL_PWR 0x0028 +#define MIPITX_PLL_CON0 0x002c +#define MIPITX_PLL_CON1 0x0030 +#define MIPITX_PLL_CON2 0x0034 +#define MIPITX_PLL_CON3 0x0038 +#define MIPITX_PLL_CON4 0x003c +#define RG_DSI_PLL_IBIAS (3 << 10) + +#define MIPITX_D2_SW_CTL_EN 0x0144 +#define MIPITX_D0_SW_CTL_EN 0x0244 +#define MIPITX_CK_CKMODE_EN 0x0328 +#define DSI_CK_CKMODE_EN BIT(0) +#define MIPITX_CK_SW_CTL_EN 0x0344 +#define MIPITX_D1_SW_CTL_EN 0x0444 +#define MIPITX_D3_SW_CTL_EN 0x0544 +#define DSI_SW_CTL_EN BIT(0) +#define AD_DSI_PLL_SDM_PWR_ON BIT(0) +#define AD_DSI_PLL_SDM_ISO_EN BIT(1) + +#define RG_DSI_PLL_EN BIT(4) +#define RG_DSI_PLL_POSDIV (0x7 << 8) + +static int mtk_mipi_tx_pll_enable(struct clk_hw *hw) +{ + struct mtk_mipi_tx *mipi_tx = mtk_mipi_tx_from_clk_hw(hw); + unsigned int txdiv, txdiv0; + u64 pcw; + + dev_dbg(mipi_tx->dev, "enable: %u bps\n", mipi_tx->data_rate); + + if (mipi_tx->data_rate >= 2000000000) { + txdiv = 1; + txdiv0 = 0; + } else if (mipi_tx->data_rate >= 1000000000) { + txdiv = 2; + txdiv0 = 1; + } else if (mipi_tx->data_rate >= 500000000) { + txdiv = 4; + txdiv0 = 2; + } else if (mipi_tx->data_rate > 250000000) { + txdiv = 8; + txdiv0 = 3; + } else if (mipi_tx->data_rate >= 125000000) { + txdiv = 16; + txdiv0 = 4; + } else { + return -EINVAL; + } + + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_PLL_CON4, RG_DSI_PLL_IBIAS); + + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_PLL_PWR, AD_DSI_PLL_SDM_PWR_ON); + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_PLL_CON1, RG_DSI_PLL_EN); + udelay(1); + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_PLL_PWR, AD_DSI_PLL_SDM_ISO_EN); + pcw = div_u64(((u64)mipi_tx->data_rate * txdiv) << 24, 26000000); + writel(pcw, mipi_tx->regs + MIPITX_PLL_CON0); + mtk_mipi_tx_update_bits(mipi_tx, MIPITX_PLL_CON1, RG_DSI_PLL_POSDIV, + txdiv0 << 8); + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_PLL_CON1, RG_DSI_PLL_EN); + + return 0; +} + +static void mtk_mipi_tx_pll_disable(struct clk_hw *hw) +{ + struct mtk_mipi_tx *mipi_tx = mtk_mipi_tx_from_clk_hw(hw); + + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_PLL_CON1, RG_DSI_PLL_EN); + + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_PLL_PWR, AD_DSI_PLL_SDM_ISO_EN); + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_PLL_PWR, AD_DSI_PLL_SDM_PWR_ON); +} + +static long mtk_mipi_tx_pll_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + return clamp_val(rate, 50000000, 1600000000); +} + +static const struct clk_ops mtk_mipi_tx_pll_ops = { + .enable = mtk_mipi_tx_pll_enable, + .disable = mtk_mipi_tx_pll_disable, + .round_rate = mtk_mipi_tx_pll_round_rate, + .set_rate = mtk_mipi_tx_pll_set_rate, + .recalc_rate = mtk_mipi_tx_pll_recalc_rate, +}; + +static void mtk_mipi_tx_power_on_signal(struct phy *phy) +{ + struct mtk_mipi_tx *mipi_tx = phy_get_drvdata(phy); + + /* BG_LPF_EN / BG_CORE_EN */ + writel(RG_DSI_PAD_TIEL_SEL | RG_DSI_BG_CORE_EN, + mipi_tx->regs + MIPITX_LANE_CON); + usleep_range(30, 100); + writel(RG_DSI_BG_CORE_EN | RG_DSI_BG_LPF_EN, + mipi_tx->regs + MIPITX_LANE_CON); + + /* Switch OFF each Lane */ + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_D0_SW_CTL_EN, DSI_SW_CTL_EN); + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_D1_SW_CTL_EN, DSI_SW_CTL_EN); + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_D2_SW_CTL_EN, DSI_SW_CTL_EN); + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_D3_SW_CTL_EN, DSI_SW_CTL_EN); + mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_CK_SW_CTL_EN, DSI_SW_CTL_EN); + + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_CK_CKMODE_EN, DSI_CK_CKMODE_EN); +} + +static void mtk_mipi_tx_power_off_signal(struct phy *phy) +{ + struct mtk_mipi_tx *mipi_tx = phy_get_drvdata(phy); + + /* Switch ON each Lane */ + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_D0_SW_CTL_EN, DSI_SW_CTL_EN); + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_D1_SW_CTL_EN, DSI_SW_CTL_EN); + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_D2_SW_CTL_EN, DSI_SW_CTL_EN); + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_D3_SW_CTL_EN, DSI_SW_CTL_EN); + mtk_mipi_tx_set_bits(mipi_tx, MIPITX_CK_SW_CTL_EN, DSI_SW_CTL_EN); + + writel(RG_DSI_PAD_TIEL_SEL | RG_DSI_BG_CORE_EN, + mipi_tx->regs + MIPITX_LANE_CON); + writel(RG_DSI_PAD_TIEL_SEL, mipi_tx->regs + MIPITX_LANE_CON); +} + +const struct mtk_mipitx_data mt8183_mipitx_data = { + .mipi_tx_clk_ops = &mtk_mipi_tx_pll_ops, + .mipi_tx_enable_signal = mtk_mipi_tx_power_on_signal, + .mipi_tx_disable_signal = mtk_mipi_tx_power_off_signal, +}; diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 663ff9f4fac9..1e7b1be25bb0 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -26,6 +26,8 @@ #include "dsi_cfg.h" #include "msm_kms.h" +#define DSI_RESET_TOGGLE_DELAY_MS 20 + static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor) { u32 ver; @@ -986,7 +988,7 @@ static void dsi_sw_reset(struct msm_dsi_host *msm_host) wmb(); /* clocks need to be enabled before reset */ dsi_write(msm_host, REG_DSI_RESET, 1); - wmb(); /* make sure reset happen */ + msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */ dsi_write(msm_host, REG_DSI_RESET, 0); } @@ -1396,7 +1398,7 @@ static void dsi_sw_reset_restore(struct msm_dsi_host *msm_host) /* dsi controller can only be reset while clocks are running */ dsi_write(msm_host, REG_DSI_RESET, 1); - wmb(); /* make sure reset happen */ + msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */ dsi_write(msm_host, REG_DSI_RESET, 0); wmb(); /* controller out of reset */ dsi_write(msm_host, REG_DSI_CTRL, data0); diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c index 41d495a360d8..225ec808b01a 100644 --- a/drivers/gpu/drm/omapdrm/dss/dss.c +++ b/drivers/gpu/drm/omapdrm/dss/dss.c @@ -1083,7 +1083,7 @@ static const struct dss_features omap34xx_dss_feats = { static const struct dss_features omap3630_dss_feats = { .model = DSS_MODEL_OMAP3, - .fck_div_max = 32, + .fck_div_max = 31, .fck_freq_max = 173000000, .dss_fck_multiplier = 1, .parent_clk_name = "dpll4_ck", diff --git a/drivers/gpu/drm/panel/panel-lg-lb035q02.c b/drivers/gpu/drm/panel/panel-lg-lb035q02.c index f984a5189fbf..7a1385e834f0 100644 --- a/drivers/gpu/drm/panel/panel-lg-lb035q02.c +++ b/drivers/gpu/drm/panel/panel-lg-lb035q02.c @@ -219,9 +219,17 @@ static const struct of_device_id lb035q02_of_match[] = { MODULE_DEVICE_TABLE(of, lb035q02_of_match); +static const struct spi_device_id lb035q02_ids[] = { + { "lb035q02", 0 }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(spi, lb035q02_ids); + static struct spi_driver lb035q02_driver = { .probe = lb035q02_probe, .remove = lb035q02_remove, + .id_table = lb035q02_ids, .driver = { .name = "panel-lg-lb035q02", .of_match_table = lb035q02_of_match, @@ -230,7 +238,6 @@ static struct spi_driver lb035q02_driver = { module_spi_driver(lb035q02_driver); -MODULE_ALIAS("spi:lgphilips,lb035q02"); MODULE_AUTHOR("Tomi Valkeinen <tomi.valkeinen@ti.com>"); MODULE_DESCRIPTION("LG.Philips LB035Q02 LCD Panel driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c index c4bc7fa85fad..fd593532ab23 100644 --- a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c +++ b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c @@ -229,9 +229,17 @@ static const struct of_device_id nl8048_of_match[] = { MODULE_DEVICE_TABLE(of, nl8048_of_match); +static const struct spi_device_id nl8048_ids[] = { + { "nl8048hl11", 0 }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(spi, nl8048_ids); + static struct spi_driver nl8048_driver = { .probe = nl8048_probe, .remove = nl8048_remove, + .id_table = nl8048_ids, .driver = { .name = "panel-nec-nl8048hl11", .pm = &nl8048_pm_ops, @@ -241,7 +249,6 @@ static struct spi_driver nl8048_driver = { module_spi_driver(nl8048_driver); -MODULE_ALIAS("spi:nec,nl8048hl11"); MODULE_AUTHOR("Erik Gilling <konkers@android.com>"); MODULE_DESCRIPTION("NEC-NL8048HL11 Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panel/panel-sony-acx565akm.c b/drivers/gpu/drm/panel/panel-sony-acx565akm.c index 6f5ce5867ff9..d6387d8f88a3 100644 --- a/drivers/gpu/drm/panel/panel-sony-acx565akm.c +++ b/drivers/gpu/drm/panel/panel-sony-acx565akm.c @@ -683,9 +683,17 @@ static const struct of_device_id acx565akm_of_match[] = { MODULE_DEVICE_TABLE(of, acx565akm_of_match); +static const struct spi_device_id acx565akm_ids[] = { + { "acx565akm", 0 }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(spi, acx565akm_ids); + static struct spi_driver acx565akm_driver = { .probe = acx565akm_probe, .remove = acx565akm_remove, + .id_table = acx565akm_ids, .driver = { .name = "panel-sony-acx565akm", .of_match_table = acx565akm_of_match, @@ -694,7 +702,6 @@ static struct spi_driver acx565akm_driver = { module_spi_driver(acx565akm_driver); -MODULE_ALIAS("spi:sony,acx565akm"); MODULE_AUTHOR("Nokia Corporation"); MODULE_DESCRIPTION("Sony ACX565AKM LCD Panel Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c index 1d08d03f509d..c44d6a65c0aa 100644 --- a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c +++ b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c @@ -374,8 +374,7 @@ static const struct of_device_id td028ttec1_of_match[] = { MODULE_DEVICE_TABLE(of, td028ttec1_of_match); static const struct spi_device_id td028ttec1_ids[] = { - { "tpo,td028ttec1", 0}, - { "toppoly,td028ttec1", 0 }, + { "td028ttec1", 0 }, { /* sentinel */ } }; diff --git a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c index 416aebd23b6a..621b65feec07 100644 --- a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c +++ b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c @@ -490,9 +490,17 @@ static const struct of_device_id td043mtea1_of_match[] = { MODULE_DEVICE_TABLE(of, td043mtea1_of_match); +static const struct spi_device_id td043mtea1_ids[] = { + { "td043mtea1", 0 }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(spi, td043mtea1_ids); + static struct spi_driver td043mtea1_driver = { .probe = td043mtea1_probe, .remove = td043mtea1_remove, + .id_table = td043mtea1_ids, .driver = { .name = "panel-tpo-td043mtea1", .pm = &td043mtea1_pm_ops, @@ -502,7 +510,6 @@ static struct spi_driver td043mtea1_driver = { module_spi_driver(td043mtea1_driver); -MODULE_ALIAS("spi:tpo,td043mtea1"); MODULE_AUTHOR("Gražvydas Ignotas <notasas@gmail.com>"); MODULE_DESCRIPTION("TPO TD043MTEA1 Panel Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index f67ed925c0ef..8822ec13a0d6 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -208,6 +208,9 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev) pfdev->features.mem_features = gpu_read(pfdev, GPU_MEM_FEATURES); pfdev->features.mmu_features = gpu_read(pfdev, GPU_MMU_FEATURES); pfdev->features.thread_features = gpu_read(pfdev, GPU_THREAD_FEATURES); + pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS); + pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE); + pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE); pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES); for (i = 0; i < 4; i++) pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i)); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 19590fff135c..888e0f384c61 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -379,19 +379,11 @@ radeon_pci_remove(struct pci_dev *pdev) static void radeon_pci_shutdown(struct pci_dev *pdev) { - struct drm_device *ddev = pci_get_drvdata(pdev); - /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown */ if (radeon_device_is_virtual()) radeon_pci_remove(pdev); - - /* Some adapters need to be suspended before a - * shutdown occurs in order to prevent an error - * during kexec. - */ - radeon_suspend_kms(ddev, true, true, false); } static int radeon_pmops_suspend(struct device *dev) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c index 9c93eb4fad8b..f266c17b907a 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c @@ -131,6 +131,35 @@ static const struct rcar_du_device_info rcar_du_r8a774a1_info = { .dpll_mask = BIT(1), }; +static const struct rcar_du_device_info rcar_du_r8a774b1_info = { + .gen = 3, + .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK + | RCAR_DU_FEATURE_VSP1_SOURCE + | RCAR_DU_FEATURE_INTERLACED + | RCAR_DU_FEATURE_TVM_SYNC, + .channels_mask = BIT(3) | BIT(1) | BIT(0), + .routes = { + /* + * R8A774B1 has one RGB output, one LVDS output and one HDMI + * output. + */ + [RCAR_DU_OUTPUT_DPAD0] = { + .possible_crtcs = BIT(2), + .port = 0, + }, + [RCAR_DU_OUTPUT_HDMI0] = { + .possible_crtcs = BIT(1), + .port = 1, + }, + [RCAR_DU_OUTPUT_LVDS0] = { + .possible_crtcs = BIT(0), + .port = 2, + }, + }, + .num_lvds = 1, + .dpll_mask = BIT(1), +}; + static const struct rcar_du_device_info rcar_du_r8a774c0_info = { .gen = 3, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK @@ -416,6 +445,7 @@ static const struct of_device_id rcar_du_of_table[] = { { .compatible = "renesas,du-r8a7745", .data = &rzg1_du_r8a7745_info }, { .compatible = "renesas,du-r8a77470", .data = &rzg1_du_r8a77470_info }, { .compatible = "renesas,du-r8a774a1", .data = &rcar_du_r8a774a1_info }, + { .compatible = "renesas,du-r8a774b1", .data = &rcar_du_r8a774b1_info }, { .compatible = "renesas,du-r8a774c0", .data = &rcar_du_r8a774c0_info }, { .compatible = "renesas,du-r8a7779", .data = &rcar_du_r8a7779_info }, { .compatible = "renesas,du-r8a7790", .data = &rcar_du_r8a7790_info }, diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index 2dc9caee8767..0d59f390de19 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -585,7 +585,11 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) vsps[j].crtcs_mask |= BIT(i); - /* Store the VSP pointer and pipe index in the CRTC. */ + /* + * Store the VSP pointer and pipe index in the CRTC. If the + * second cell of the 'vsps' specifier isn't present, default + * to 0 to remain compatible with older DT bindings. + */ rcdu->crtcs[i].vsp = &rcdu->vsps[j]; rcdu->crtcs[i].vsp_pipe = cells >= 1 ? args.args[0] : 0; } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_writeback.c b/drivers/gpu/drm/rcar-du/rcar_du_writeback.c index ae07290bba6a..04efa78d70b6 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_writeback.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_writeback.c @@ -147,7 +147,7 @@ static int rcar_du_wb_enc_atomic_check(struct drm_encoder *encoder, struct drm_device *dev = encoder->dev; struct drm_framebuffer *fb; - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; fb = conn_state->writeback_job->fb; @@ -221,7 +221,7 @@ void rcar_du_writeback_setup(struct rcar_du_crtc *rcrtc, unsigned int i; state = rcrtc->writeback.base.state; - if (!state || !state->writeback_job || !state->writeback_job->fb) + if (!state || !state->writeback_job) return; fb = state->writeback_job->fb; diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c index 3fc7e6899cab..8c6c172bbf2e 100644 --- a/drivers/gpu/drm/rcar-du/rcar_lvds.c +++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c @@ -16,6 +16,7 @@ #include <linux/of_graph.h> #include <linux/platform_device.h> #include <linux/slab.h> +#include <linux/sys_soc.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> @@ -842,8 +843,23 @@ static int rcar_lvds_get_clocks(struct rcar_lvds *lvds) return 0; } +static const struct rcar_lvds_device_info rcar_lvds_r8a7790es1_info = { + .gen = 2, + .quirks = RCAR_LVDS_QUIRK_LANES, + .pll_setup = rcar_lvds_pll_setup_gen2, +}; + +static const struct soc_device_attribute lvds_quirk_matches[] = { + { + .soc_id = "r8a7790", .revision = "ES1.*", + .data = &rcar_lvds_r8a7790es1_info, + }, + { /* sentinel */ } +}; + static int rcar_lvds_probe(struct platform_device *pdev) { + const struct soc_device_attribute *attr; struct rcar_lvds *lvds; struct resource *mem; int ret; @@ -857,6 +873,10 @@ static int rcar_lvds_probe(struct platform_device *pdev) lvds->dev = &pdev->dev; lvds->info = of_device_get_match_data(&pdev->dev); + attr = soc_device_match(lvds_quirk_matches); + if (attr) + lvds->info = attr->data; + ret = rcar_lvds_parse_dt(lvds); if (ret < 0) return ret; @@ -893,12 +913,6 @@ static const struct rcar_lvds_device_info rcar_lvds_gen2_info = { .pll_setup = rcar_lvds_pll_setup_gen2, }; -static const struct rcar_lvds_device_info rcar_lvds_r8a7790_info = { - .gen = 2, - .quirks = RCAR_LVDS_QUIRK_LANES, - .pll_setup = rcar_lvds_pll_setup_gen2, -}; - static const struct rcar_lvds_device_info rcar_lvds_gen3_info = { .gen = 3, .quirks = RCAR_LVDS_QUIRK_PWD, @@ -929,8 +943,9 @@ static const struct of_device_id rcar_lvds_of_table[] = { { .compatible = "renesas,r8a7743-lvds", .data = &rcar_lvds_gen2_info }, { .compatible = "renesas,r8a7744-lvds", .data = &rcar_lvds_gen2_info }, { .compatible = "renesas,r8a774a1-lvds", .data = &rcar_lvds_gen3_info }, + { .compatible = "renesas,r8a774b1-lvds", .data = &rcar_lvds_gen3_info }, { .compatible = "renesas,r8a774c0-lvds", .data = &rcar_lvds_r8a77990_info }, - { .compatible = "renesas,r8a7790-lvds", .data = &rcar_lvds_r8a7790_info }, + { .compatible = "renesas,r8a7790-lvds", .data = &rcar_lvds_gen2_info }, { .compatible = "renesas,r8a7791-lvds", .data = &rcar_lvds_gen2_info }, { .compatible = "renesas,r8a7793-lvds", .data = &rcar_lvds_gen2_info }, { .compatible = "renesas,r8a7795-lvds", .data = &rcar_lvds_gen3_info }, diff --git a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c index 525dc1c0f1c1..530edb3b51cc 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c @@ -7,6 +7,7 @@ #include <linux/gpio.h> #include <linux/mod_devicetable.h> #include <linux/of_gpio.h> +#include <linux/pinctrl/consumer.h> #include <linux/platform_device.h> #include <drm/drm_atomic_helper.h> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c7f86499165f..f00b2e79882f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -185,8 +185,9 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo, list_add_tail(&bo->lru, &man->lru[bo->priority]); kref_get(&bo->list_kref); - if (bo->ttm && !(bo->ttm->page_flags & - (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) { + if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm && + !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | + TTM_PAGE_FLAG_SWAPPED))) { list_add_tail(&bo->swap, &bdev->glob->swap_lru[bo->priority]); kref_get(&bo->list_kref); } @@ -878,11 +879,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev, if (!bo) { if (busy_bo) - ttm_bo_get(busy_bo); + kref_get(&busy_bo->list_kref); spin_unlock(&glob->lru_lock); ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) - ttm_bo_put(busy_bo); + kref_put(&busy_bo->list_kref, ttm_bo_release_list); return ret; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 1a9db691f954..79f01c5ff65e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -278,15 +278,13 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) else ret = vmf_insert_pfn(&cvma, address, pfn); - /* - * Somebody beat us to this PTE or prefaulting to - * an already populated PTE, or prefaulting error. - */ - - if (unlikely((ret == VM_FAULT_NOPAGE && i > 0))) - break; - else if (unlikely(ret & VM_FAULT_ERROR)) - goto out_io_unlock; + /* Never error on prefaulted PTEs */ + if (unlikely((ret & VM_FAULT_ERROR))) { + if (i == 0) + goto out_io_unlock; + else + break; + } address += PAGE_SIZE; if (unlikely(++page_offset >= page_last)) diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c index 1ce4d7142b6e..bf720206727f 100644 --- a/drivers/gpu/drm/vc4/vc4_txp.c +++ b/drivers/gpu/drm/vc4/vc4_txp.c @@ -231,7 +231,7 @@ static int vc4_txp_connector_atomic_check(struct drm_connector *conn, int i; conn_state = drm_atomic_get_new_connector_state(state, conn); - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + if (!conn_state->writeback_job) return 0; crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); @@ -271,8 +271,7 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn, u32 ctrl; int i; - if (WARN_ON(!conn_state->writeback_job || - !conn_state->writeback_job->fb)) + if (WARN_ON(!conn_state->writeback_job)) return; mode = &conn_state->crtc->state->adjusted_mode; diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index ba1828acd8c9..4be49c1aef51 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -718,17 +718,9 @@ static int xen_drv_probe(struct xenbus_device *xb_dev, struct device *dev = &xb_dev->dev; int ret; - /* - * The device is not spawn from a device tree, so arch_setup_dma_ops - * is not called, thus leaving the device with dummy DMA ops. - * This makes the device return error on PRIME buffer import, which - * is not correct: to fix this call of_dma_configure() with a NULL - * node to set default DMA ops. - */ - dev->coherent_dma_mask = DMA_BIT_MASK(32); - ret = of_dma_configure(dev, NULL, true); + ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (ret < 0) { - DRM_ERROR("Cannot setup DMA ops, ret %d", ret); + DRM_ERROR("Cannot setup DMA mask, ret %d", ret); return ret; } diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index cc5b09b87ab0..79a28fc91521 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -314,60 +314,24 @@ static void mousevsc_on_receive(struct hv_device *device, static void mousevsc_on_channel_callback(void *context) { - const int packet_size = 0x100; - int ret; struct hv_device *device = context; - u32 bytes_recvd; - u64 req_id; struct vmpacket_descriptor *desc; - unsigned char *buffer; - int bufferlen = packet_size; - - buffer = kmalloc(bufferlen, GFP_ATOMIC); - if (!buffer) - return; - - do { - ret = vmbus_recvpacket_raw(device->channel, buffer, - bufferlen, &bytes_recvd, &req_id); - - switch (ret) { - case 0: - if (bytes_recvd <= 0) { - kfree(buffer); - return; - } - desc = (struct vmpacket_descriptor *)buffer; - - switch (desc->type) { - case VM_PKT_COMP: - break; - - case VM_PKT_DATA_INBAND: - mousevsc_on_receive(device, desc); - break; - - default: - pr_err("unhandled packet type %d, tid %llx len %d\n", - desc->type, req_id, bytes_recvd); - break; - } + foreach_vmbus_pkt(desc, device->channel) { + switch (desc->type) { + case VM_PKT_COMP: break; - case -ENOBUFS: - kfree(buffer); - /* Handle large packet */ - bufferlen = bytes_recvd; - buffer = kmalloc(bytes_recvd, GFP_ATOMIC); - - if (!buffer) - return; + case VM_PKT_DATA_INBAND: + mousevsc_on_receive(device, desc); + break; + default: + pr_err("Unhandled packet type %d, tid %llx len %d\n", + desc->type, desc->trans_id, desc->len8 * 8); break; } - } while (1); - + } } static int mousevsc_connect_to_vsp(struct hv_device *device) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 391f0b225c9a..53a60c81e220 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -912,6 +912,7 @@ static void vmbus_shutdown(struct device *child_device) drv->shutdown(dev); } +#ifdef CONFIG_PM_SLEEP /* * vmbus_suspend - Suspend a vmbus device */ @@ -949,6 +950,7 @@ static int vmbus_resume(struct device *child_device) return drv->resume(dev); } +#endif /* CONFIG_PM_SLEEP */ /* * vmbus_device_release - Final callback release of the vmbus child device @@ -1070,6 +1072,7 @@ msg_handled: vmbus_signal_eom(msg, message_type); } +#ifdef CONFIG_PM_SLEEP /* * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for * hibernation, because hv_sock connections can not persist across hibernation. @@ -1105,6 +1108,7 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) vmbus_connection.work_queue, &ctx->work); } +#endif /* CONFIG_PM_SLEEP */ /* * Direct callback for channels using other deferred processing @@ -2125,6 +2129,7 @@ acpi_walk_err: return ret_val; } +#ifdef CONFIG_PM_SLEEP static int vmbus_bus_suspend(struct device *dev) { struct vmbus_channel *channel, *sc; @@ -2247,6 +2252,7 @@ static int vmbus_bus_resume(struct device *dev) return 0; } +#endif /* CONFIG_PM_SLEEP */ static const struct acpi_device_id vmbus_acpi_device_ids[] = { {"VMBUS", 0}, diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c index 95b447cfa24c..b26419dbe840 100644 --- a/drivers/hwmon/nct7904.c +++ b/drivers/hwmon/nct7904.c @@ -99,6 +99,8 @@ struct nct7904_data { u8 enable_dts; u8 has_dts; u8 temp_mode; /* 0: TR mode, 1: TD mode */ + u8 fan_alarm[2]; + u8 vsen_alarm[3]; }; /* Access functions */ @@ -214,7 +216,15 @@ static int nct7904_read_fan(struct device *dev, u32 attr, int channel, SMI_STS5_REG + (channel >> 3)); if (ret < 0) return ret; - *val = (ret >> (channel & 0x07)) & 1; + if (!data->fan_alarm[channel >> 3]) + data->fan_alarm[channel >> 3] = ret & 0xff; + else + /* If there is new alarm showing up */ + data->fan_alarm[channel >> 3] |= (ret & 0xff); + *val = (data->fan_alarm[channel >> 3] >> (channel & 0x07)) & 1; + /* Needs to clean the alarm if alarm existing */ + if (*val) + data->fan_alarm[channel >> 3] ^= 1 << (channel & 0x07); return 0; default: return -EOPNOTSUPP; @@ -298,7 +308,15 @@ static int nct7904_read_in(struct device *dev, u32 attr, int channel, SMI_STS1_REG + (index >> 3)); if (ret < 0) return ret; - *val = (ret >> (index & 0x07)) & 1; + if (!data->vsen_alarm[index >> 3]) + data->vsen_alarm[index >> 3] = ret & 0xff; + else + /* If there is new alarm showing up */ + data->vsen_alarm[index >> 3] |= (ret & 0xff); + *val = (data->vsen_alarm[index >> 3] >> (index & 0x07)) & 1; + /* Needs to clean the alarm if alarm existing */ + if (*val) + data->vsen_alarm[index >> 3] ^= 1 << (index & 0x07); return 0; default: return -EOPNOTSUPP; @@ -915,12 +933,15 @@ static int nct7904_probe(struct i2c_client *client, data->temp_mode = 0; for (i = 0; i < 4; i++) { - val = (ret & (0x03 << i)) >> (i * 2); + val = (ret >> (i * 2)) & 0x03; bit = (1 << i); - if (val == 0) + if (val == 0) { data->tcpu_mask &= ~bit; - else if (val == 0x1 || val == 0x2) - data->temp_mode |= bit; + } else { + if (val == 0x1 || val == 0x2) + data->temp_mode |= bit; + data->vsen_mask &= ~(0x06 << (i * 2)); + } } /* PECI */ diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c index 055227cb3d43..67b8817995c0 100644 --- a/drivers/iio/accel/adxl372.c +++ b/drivers/iio/accel/adxl372.c @@ -474,12 +474,17 @@ static int adxl372_configure_fifo(struct adxl372_state *st) if (ret < 0) return ret; - fifo_samples = st->watermark & 0xFF; + /* + * watermark stores the number of sets; we need to write the FIFO + * registers with the number of samples + */ + fifo_samples = (st->watermark * st->fifo_set_size); fifo_ctl = ADXL372_FIFO_CTL_FORMAT_MODE(st->fifo_format) | ADXL372_FIFO_CTL_MODE_MODE(st->fifo_mode) | - ADXL372_FIFO_CTL_SAMPLES_MODE(st->watermark); + ADXL372_FIFO_CTL_SAMPLES_MODE(fifo_samples); - ret = regmap_write(st->regmap, ADXL372_FIFO_SAMPLES, fifo_samples); + ret = regmap_write(st->regmap, + ADXL372_FIFO_SAMPLES, fifo_samples & 0xFF); if (ret < 0) return ret; @@ -548,8 +553,7 @@ static irqreturn_t adxl372_trigger_handler(int irq, void *p) goto err; /* Each sample is 2 bytes */ - for (i = 0; i < fifo_entries * sizeof(u16); - i += st->fifo_set_size * sizeof(u16)) + for (i = 0; i < fifo_entries; i += st->fifo_set_size) iio_push_to_buffers(indio_dev, &st->fifo_buf[i]); } err: @@ -571,6 +575,14 @@ static int adxl372_setup(struct adxl372_state *st) return -ENODEV; } + /* + * Perform a software reset to make sure the device is in a consistent + * state after start up. + */ + ret = regmap_write(st->regmap, ADXL372_RESET, ADXL372_RESET_CODE); + if (ret < 0) + return ret; + ret = adxl372_set_op_mode(st, ADXL372_STANDBY); if (ret < 0) return ret; diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index cf6c0e3a83d3..121b4e89f038 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -117,7 +117,7 @@ #define BMC150_ACCEL_SLEEP_1_SEC 0x0F #define BMC150_ACCEL_REG_TEMP 0x08 -#define BMC150_ACCEL_TEMP_CENTER_VAL 24 +#define BMC150_ACCEL_TEMP_CENTER_VAL 23 #define BMC150_ACCEL_AXIS_TO_REG(axis) (BMC150_ACCEL_REG_XOUT_L + (axis * 2)) #define BMC150_AUTO_SUSPEND_DELAY_MS 2000 diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c index 5a3ca5904ded..f658012baad8 100644 --- a/drivers/iio/adc/ad799x.c +++ b/drivers/iio/adc/ad799x.c @@ -810,10 +810,10 @@ static int ad799x_probe(struct i2c_client *client, ret = ad799x_write_config(st, st->chip_config->default_config); if (ret < 0) - goto error_disable_reg; + goto error_disable_vref; ret = ad799x_read_config(st); if (ret < 0) - goto error_disable_reg; + goto error_disable_vref; st->config = ret; ret = iio_triggered_buffer_setup(indio_dev, NULL, diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index adc9cf7a075d..8ea2aed6d6f5 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -7,6 +7,7 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#include <linux/dmi.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/device.h> @@ -25,6 +26,11 @@ #define AXP288_ADC_EN_MASK 0xF0 #define AXP288_ADC_TS_ENABLE 0x01 +#define AXP288_ADC_TS_BIAS_MASK GENMASK(5, 4) +#define AXP288_ADC_TS_BIAS_20UA (0 << 4) +#define AXP288_ADC_TS_BIAS_40UA (1 << 4) +#define AXP288_ADC_TS_BIAS_60UA (2 << 4) +#define AXP288_ADC_TS_BIAS_80UA (3 << 4) #define AXP288_ADC_TS_CURRENT_ON_OFF_MASK GENMASK(1, 0) #define AXP288_ADC_TS_CURRENT_OFF (0 << 0) #define AXP288_ADC_TS_CURRENT_ON_WHEN_CHARGING (1 << 0) @@ -177,10 +183,36 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, return ret; } +/* + * We rely on the machine's firmware to correctly setup the TS pin bias current + * at boot. This lists systems with broken fw where we need to set it ourselves. + */ +static const struct dmi_system_id axp288_adc_ts_bias_override[] = { + { + /* Lenovo Ideapad 100S (11 inch) */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 100S-11IBY"), + }, + .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA, + }, + {} +}; + static int axp288_adc_initialize(struct axp288_adc_info *info) { + const struct dmi_system_id *bias_override; int ret, adc_enable_val; + bias_override = dmi_first_match(axp288_adc_ts_bias_override); + if (bias_override) { + ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL, + AXP288_ADC_TS_BIAS_MASK, + (uintptr_t)bias_override->driver_data); + if (ret) + return ret; + } + /* * Determine if the TS pin is enabled and set the TS current-source * accordingly. diff --git a/drivers/iio/adc/hx711.c b/drivers/iio/adc/hx711.c index 88c7fe15003b..62e6c8badd22 100644 --- a/drivers/iio/adc/hx711.c +++ b/drivers/iio/adc/hx711.c @@ -100,14 +100,14 @@ struct hx711_data { static int hx711_cycle(struct hx711_data *hx711_data) { - int val; + unsigned long flags; /* * if preempted for more then 60us while PD_SCK is high: * hx711 is going in reset * ==> measuring is false */ - preempt_disable(); + local_irq_save(flags); gpiod_set_value(hx711_data->gpiod_pd_sck, 1); /* @@ -117,7 +117,6 @@ static int hx711_cycle(struct hx711_data *hx711_data) */ ndelay(hx711_data->data_ready_delay_ns); - val = gpiod_get_value(hx711_data->gpiod_dout); /* * here we are not waiting for 0.2 us as suggested by the datasheet, * because the oscilloscope showed in a test scenario @@ -125,7 +124,7 @@ static int hx711_cycle(struct hx711_data *hx711_data) * and 0.56 us for PD_SCK low on TI Sitara with 800 MHz */ gpiod_set_value(hx711_data->gpiod_pd_sck, 0); - preempt_enable(); + local_irq_restore(flags); /* * make it a square wave for addressing cases with capacitance on @@ -133,7 +132,8 @@ static int hx711_cycle(struct hx711_data *hx711_data) */ ndelay(hx711_data->data_ready_delay_ns); - return val; + /* sample as late as possible */ + return gpiod_get_value(hx711_data->gpiod_dout); } static int hx711_read(struct hx711_data *hx711_data) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 7b28d045d271..7b27306330a3 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -1219,6 +1219,11 @@ static int meson_sar_adc_probe(struct platform_device *pdev) if (IS_ERR(base)) return PTR_ERR(base); + priv->regmap = devm_regmap_init_mmio(&pdev->dev, base, + priv->param->regmap_config); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); + irq = irq_of_parse_and_map(pdev->dev.of_node, 0); if (!irq) return -EINVAL; @@ -1228,11 +1233,6 @@ static int meson_sar_adc_probe(struct platform_device *pdev) if (ret) return ret; - priv->regmap = devm_regmap_init_mmio(&pdev->dev, base, - priv->param->regmap_config); - if (IS_ERR(priv->regmap)) - return PTR_ERR(priv->regmap); - priv->clkin = devm_clk_get(&pdev->dev, "clkin"); if (IS_ERR(priv->clkin)) { dev_err(&pdev->dev, "failed to get clkin\n"); diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index 9b85fefc0a96..93a096a91f8c 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -24,33 +24,6 @@ #include "stm32-adc-core.h" -/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */ -#define STM32F4_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) -#define STM32F4_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x04) - -/* STM32F4_ADC_CSR - bit fields */ -#define STM32F4_EOC3 BIT(17) -#define STM32F4_EOC2 BIT(9) -#define STM32F4_EOC1 BIT(1) - -/* STM32F4_ADC_CCR - bit fields */ -#define STM32F4_ADC_ADCPRE_SHIFT 16 -#define STM32F4_ADC_ADCPRE_MASK GENMASK(17, 16) - -/* STM32H7 - common registers for all ADC instances */ -#define STM32H7_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) -#define STM32H7_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x08) - -/* STM32H7_ADC_CSR - bit fields */ -#define STM32H7_EOC_SLV BIT(18) -#define STM32H7_EOC_MST BIT(2) - -/* STM32H7_ADC_CCR - bit fields */ -#define STM32H7_PRESC_SHIFT 18 -#define STM32H7_PRESC_MASK GENMASK(21, 18) -#define STM32H7_CKMODE_SHIFT 16 -#define STM32H7_CKMODE_MASK GENMASK(17, 16) - #define STM32_ADC_CORE_SLEEP_DELAY_MS 2000 /* SYSCFG registers */ @@ -71,6 +44,8 @@ * @eoc1: adc1 end of conversion flag in @csr * @eoc2: adc2 end of conversion flag in @csr * @eoc3: adc3 end of conversion flag in @csr + * @ier: interrupt enable register offset for each adc + * @eocie_msk: end of conversion interrupt enable mask in @ier */ struct stm32_adc_common_regs { u32 csr; @@ -78,6 +53,8 @@ struct stm32_adc_common_regs { u32 eoc1_msk; u32 eoc2_msk; u32 eoc3_msk; + u32 ier; + u32 eocie_msk; }; struct stm32_adc_priv; @@ -303,6 +280,8 @@ static const struct stm32_adc_common_regs stm32f4_adc_common_regs = { .eoc1_msk = STM32F4_EOC1, .eoc2_msk = STM32F4_EOC2, .eoc3_msk = STM32F4_EOC3, + .ier = STM32F4_ADC_CR1, + .eocie_msk = STM32F4_EOCIE, }; /* STM32H7 common registers definitions */ @@ -311,8 +290,24 @@ static const struct stm32_adc_common_regs stm32h7_adc_common_regs = { .ccr = STM32H7_ADC_CCR, .eoc1_msk = STM32H7_EOC_MST, .eoc2_msk = STM32H7_EOC_SLV, + .ier = STM32H7_ADC_IER, + .eocie_msk = STM32H7_EOCIE, +}; + +static const unsigned int stm32_adc_offset[STM32_ADC_MAX_ADCS] = { + 0, STM32_ADC_OFFSET, STM32_ADC_OFFSET * 2, }; +static unsigned int stm32_adc_eoc_enabled(struct stm32_adc_priv *priv, + unsigned int adc) +{ + u32 ier, offset = stm32_adc_offset[adc]; + + ier = readl_relaxed(priv->common.base + offset + priv->cfg->regs->ier); + + return ier & priv->cfg->regs->eocie_msk; +} + /* ADC common interrupt for all instances */ static void stm32_adc_irq_handler(struct irq_desc *desc) { @@ -323,13 +318,28 @@ static void stm32_adc_irq_handler(struct irq_desc *desc) chained_irq_enter(chip, desc); status = readl_relaxed(priv->common.base + priv->cfg->regs->csr); - if (status & priv->cfg->regs->eoc1_msk) + /* + * End of conversion may be handled by using IRQ or DMA. There may be a + * race here when two conversions complete at the same time on several + * ADCs. EOC may be read 'set' for several ADCs, with: + * - an ADC configured to use DMA (EOC triggers the DMA request, and + * is then automatically cleared by DR read in hardware) + * - an ADC configured to use IRQs (EOCIE bit is set. The handler must + * be called in this case) + * So both EOC status bit in CSR and EOCIE control bit must be checked + * before invoking the interrupt handler (e.g. call ISR only for + * IRQ-enabled ADCs). + */ + if (status & priv->cfg->regs->eoc1_msk && + stm32_adc_eoc_enabled(priv, 0)) generic_handle_irq(irq_find_mapping(priv->domain, 0)); - if (status & priv->cfg->regs->eoc2_msk) + if (status & priv->cfg->regs->eoc2_msk && + stm32_adc_eoc_enabled(priv, 1)) generic_handle_irq(irq_find_mapping(priv->domain, 1)); - if (status & priv->cfg->regs->eoc3_msk) + if (status & priv->cfg->regs->eoc3_msk && + stm32_adc_eoc_enabled(priv, 2)) generic_handle_irq(irq_find_mapping(priv->domain, 2)); chained_irq_exit(chip, desc); diff --git a/drivers/iio/adc/stm32-adc-core.h b/drivers/iio/adc/stm32-adc-core.h index 8af507b3f32d..2579d514c2a3 100644 --- a/drivers/iio/adc/stm32-adc-core.h +++ b/drivers/iio/adc/stm32-adc-core.h @@ -25,8 +25,145 @@ * -------------------------------------------------------- */ #define STM32_ADC_MAX_ADCS 3 +#define STM32_ADC_OFFSET 0x100 #define STM32_ADCX_COMN_OFFSET 0x300 +/* STM32F4 - Registers for each ADC instance */ +#define STM32F4_ADC_SR 0x00 +#define STM32F4_ADC_CR1 0x04 +#define STM32F4_ADC_CR2 0x08 +#define STM32F4_ADC_SMPR1 0x0C +#define STM32F4_ADC_SMPR2 0x10 +#define STM32F4_ADC_HTR 0x24 +#define STM32F4_ADC_LTR 0x28 +#define STM32F4_ADC_SQR1 0x2C +#define STM32F4_ADC_SQR2 0x30 +#define STM32F4_ADC_SQR3 0x34 +#define STM32F4_ADC_JSQR 0x38 +#define STM32F4_ADC_JDR1 0x3C +#define STM32F4_ADC_JDR2 0x40 +#define STM32F4_ADC_JDR3 0x44 +#define STM32F4_ADC_JDR4 0x48 +#define STM32F4_ADC_DR 0x4C + +/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */ +#define STM32F4_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) +#define STM32F4_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x04) + +/* STM32F4_ADC_SR - bit fields */ +#define STM32F4_STRT BIT(4) +#define STM32F4_EOC BIT(1) + +/* STM32F4_ADC_CR1 - bit fields */ +#define STM32F4_RES_SHIFT 24 +#define STM32F4_RES_MASK GENMASK(25, 24) +#define STM32F4_SCAN BIT(8) +#define STM32F4_EOCIE BIT(5) + +/* STM32F4_ADC_CR2 - bit fields */ +#define STM32F4_SWSTART BIT(30) +#define STM32F4_EXTEN_SHIFT 28 +#define STM32F4_EXTEN_MASK GENMASK(29, 28) +#define STM32F4_EXTSEL_SHIFT 24 +#define STM32F4_EXTSEL_MASK GENMASK(27, 24) +#define STM32F4_EOCS BIT(10) +#define STM32F4_DDS BIT(9) +#define STM32F4_DMA BIT(8) +#define STM32F4_ADON BIT(0) + +/* STM32F4_ADC_CSR - bit fields */ +#define STM32F4_EOC3 BIT(17) +#define STM32F4_EOC2 BIT(9) +#define STM32F4_EOC1 BIT(1) + +/* STM32F4_ADC_CCR - bit fields */ +#define STM32F4_ADC_ADCPRE_SHIFT 16 +#define STM32F4_ADC_ADCPRE_MASK GENMASK(17, 16) + +/* STM32H7 - Registers for each ADC instance */ +#define STM32H7_ADC_ISR 0x00 +#define STM32H7_ADC_IER 0x04 +#define STM32H7_ADC_CR 0x08 +#define STM32H7_ADC_CFGR 0x0C +#define STM32H7_ADC_SMPR1 0x14 +#define STM32H7_ADC_SMPR2 0x18 +#define STM32H7_ADC_PCSEL 0x1C +#define STM32H7_ADC_SQR1 0x30 +#define STM32H7_ADC_SQR2 0x34 +#define STM32H7_ADC_SQR3 0x38 +#define STM32H7_ADC_SQR4 0x3C +#define STM32H7_ADC_DR 0x40 +#define STM32H7_ADC_DIFSEL 0xC0 +#define STM32H7_ADC_CALFACT 0xC4 +#define STM32H7_ADC_CALFACT2 0xC8 + +/* STM32H7 - common registers for all ADC instances */ +#define STM32H7_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) +#define STM32H7_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x08) + +/* STM32H7_ADC_ISR - bit fields */ +#define STM32MP1_VREGREADY BIT(12) +#define STM32H7_EOC BIT(2) +#define STM32H7_ADRDY BIT(0) + +/* STM32H7_ADC_IER - bit fields */ +#define STM32H7_EOCIE STM32H7_EOC + +/* STM32H7_ADC_CR - bit fields */ +#define STM32H7_ADCAL BIT(31) +#define STM32H7_ADCALDIF BIT(30) +#define STM32H7_DEEPPWD BIT(29) +#define STM32H7_ADVREGEN BIT(28) +#define STM32H7_LINCALRDYW6 BIT(27) +#define STM32H7_LINCALRDYW5 BIT(26) +#define STM32H7_LINCALRDYW4 BIT(25) +#define STM32H7_LINCALRDYW3 BIT(24) +#define STM32H7_LINCALRDYW2 BIT(23) +#define STM32H7_LINCALRDYW1 BIT(22) +#define STM32H7_ADCALLIN BIT(16) +#define STM32H7_BOOST BIT(8) +#define STM32H7_ADSTP BIT(4) +#define STM32H7_ADSTART BIT(2) +#define STM32H7_ADDIS BIT(1) +#define STM32H7_ADEN BIT(0) + +/* STM32H7_ADC_CFGR bit fields */ +#define STM32H7_EXTEN_SHIFT 10 +#define STM32H7_EXTEN_MASK GENMASK(11, 10) +#define STM32H7_EXTSEL_SHIFT 5 +#define STM32H7_EXTSEL_MASK GENMASK(9, 5) +#define STM32H7_RES_SHIFT 2 +#define STM32H7_RES_MASK GENMASK(4, 2) +#define STM32H7_DMNGT_SHIFT 0 +#define STM32H7_DMNGT_MASK GENMASK(1, 0) + +enum stm32h7_adc_dmngt { + STM32H7_DMNGT_DR_ONLY, /* Regular data in DR only */ + STM32H7_DMNGT_DMA_ONESHOT, /* DMA one shot mode */ + STM32H7_DMNGT_DFSDM, /* DFSDM mode */ + STM32H7_DMNGT_DMA_CIRC, /* DMA circular mode */ +}; + +/* STM32H7_ADC_CALFACT - bit fields */ +#define STM32H7_CALFACT_D_SHIFT 16 +#define STM32H7_CALFACT_D_MASK GENMASK(26, 16) +#define STM32H7_CALFACT_S_SHIFT 0 +#define STM32H7_CALFACT_S_MASK GENMASK(10, 0) + +/* STM32H7_ADC_CALFACT2 - bit fields */ +#define STM32H7_LINCALFACT_SHIFT 0 +#define STM32H7_LINCALFACT_MASK GENMASK(29, 0) + +/* STM32H7_ADC_CSR - bit fields */ +#define STM32H7_EOC_SLV BIT(18) +#define STM32H7_EOC_MST BIT(2) + +/* STM32H7_ADC_CCR - bit fields */ +#define STM32H7_PRESC_SHIFT 18 +#define STM32H7_PRESC_MASK GENMASK(21, 18) +#define STM32H7_CKMODE_SHIFT 16 +#define STM32H7_CKMODE_MASK GENMASK(17, 16) + /** * struct stm32_adc_common - stm32 ADC driver common data (for all instances) * @base: control registers base cpu addr diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 6a7dd08b1e0b..663f8a5012d6 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -28,115 +28,6 @@ #include "stm32-adc-core.h" -/* STM32F4 - Registers for each ADC instance */ -#define STM32F4_ADC_SR 0x00 -#define STM32F4_ADC_CR1 0x04 -#define STM32F4_ADC_CR2 0x08 -#define STM32F4_ADC_SMPR1 0x0C -#define STM32F4_ADC_SMPR2 0x10 -#define STM32F4_ADC_HTR 0x24 -#define STM32F4_ADC_LTR 0x28 -#define STM32F4_ADC_SQR1 0x2C -#define STM32F4_ADC_SQR2 0x30 -#define STM32F4_ADC_SQR3 0x34 -#define STM32F4_ADC_JSQR 0x38 -#define STM32F4_ADC_JDR1 0x3C -#define STM32F4_ADC_JDR2 0x40 -#define STM32F4_ADC_JDR3 0x44 -#define STM32F4_ADC_JDR4 0x48 -#define STM32F4_ADC_DR 0x4C - -/* STM32F4_ADC_SR - bit fields */ -#define STM32F4_STRT BIT(4) -#define STM32F4_EOC BIT(1) - -/* STM32F4_ADC_CR1 - bit fields */ -#define STM32F4_RES_SHIFT 24 -#define STM32F4_RES_MASK GENMASK(25, 24) -#define STM32F4_SCAN BIT(8) -#define STM32F4_EOCIE BIT(5) - -/* STM32F4_ADC_CR2 - bit fields */ -#define STM32F4_SWSTART BIT(30) -#define STM32F4_EXTEN_SHIFT 28 -#define STM32F4_EXTEN_MASK GENMASK(29, 28) -#define STM32F4_EXTSEL_SHIFT 24 -#define STM32F4_EXTSEL_MASK GENMASK(27, 24) -#define STM32F4_EOCS BIT(10) -#define STM32F4_DDS BIT(9) -#define STM32F4_DMA BIT(8) -#define STM32F4_ADON BIT(0) - -/* STM32H7 - Registers for each ADC instance */ -#define STM32H7_ADC_ISR 0x00 -#define STM32H7_ADC_IER 0x04 -#define STM32H7_ADC_CR 0x08 -#define STM32H7_ADC_CFGR 0x0C -#define STM32H7_ADC_SMPR1 0x14 -#define STM32H7_ADC_SMPR2 0x18 -#define STM32H7_ADC_PCSEL 0x1C -#define STM32H7_ADC_SQR1 0x30 -#define STM32H7_ADC_SQR2 0x34 -#define STM32H7_ADC_SQR3 0x38 -#define STM32H7_ADC_SQR4 0x3C -#define STM32H7_ADC_DR 0x40 -#define STM32H7_ADC_DIFSEL 0xC0 -#define STM32H7_ADC_CALFACT 0xC4 -#define STM32H7_ADC_CALFACT2 0xC8 - -/* STM32H7_ADC_ISR - bit fields */ -#define STM32MP1_VREGREADY BIT(12) -#define STM32H7_EOC BIT(2) -#define STM32H7_ADRDY BIT(0) - -/* STM32H7_ADC_IER - bit fields */ -#define STM32H7_EOCIE STM32H7_EOC - -/* STM32H7_ADC_CR - bit fields */ -#define STM32H7_ADCAL BIT(31) -#define STM32H7_ADCALDIF BIT(30) -#define STM32H7_DEEPPWD BIT(29) -#define STM32H7_ADVREGEN BIT(28) -#define STM32H7_LINCALRDYW6 BIT(27) -#define STM32H7_LINCALRDYW5 BIT(26) -#define STM32H7_LINCALRDYW4 BIT(25) -#define STM32H7_LINCALRDYW3 BIT(24) -#define STM32H7_LINCALRDYW2 BIT(23) -#define STM32H7_LINCALRDYW1 BIT(22) -#define STM32H7_ADCALLIN BIT(16) -#define STM32H7_BOOST BIT(8) -#define STM32H7_ADSTP BIT(4) -#define STM32H7_ADSTART BIT(2) -#define STM32H7_ADDIS BIT(1) -#define STM32H7_ADEN BIT(0) - -/* STM32H7_ADC_CFGR bit fields */ -#define STM32H7_EXTEN_SHIFT 10 -#define STM32H7_EXTEN_MASK GENMASK(11, 10) -#define STM32H7_EXTSEL_SHIFT 5 -#define STM32H7_EXTSEL_MASK GENMASK(9, 5) -#define STM32H7_RES_SHIFT 2 -#define STM32H7_RES_MASK GENMASK(4, 2) -#define STM32H7_DMNGT_SHIFT 0 -#define STM32H7_DMNGT_MASK GENMASK(1, 0) - -enum stm32h7_adc_dmngt { - STM32H7_DMNGT_DR_ONLY, /* Regular data in DR only */ - STM32H7_DMNGT_DMA_ONESHOT, /* DMA one shot mode */ - STM32H7_DMNGT_DFSDM, /* DFSDM mode */ - STM32H7_DMNGT_DMA_CIRC, /* DMA circular mode */ -}; - -/* STM32H7_ADC_CALFACT - bit fields */ -#define STM32H7_CALFACT_D_SHIFT 16 -#define STM32H7_CALFACT_D_MASK GENMASK(26, 16) -#define STM32H7_CALFACT_S_SHIFT 0 -#define STM32H7_CALFACT_S_MASK GENMASK(10, 0) - -/* STM32H7_ADC_CALFACT2 - bit fields */ -#define STM32H7_LINCALFACT_SHIFT 0 -#define STM32H7_LINCALFACT_MASK GENMASK(29, 0) - /* Number of linear calibration shadow registers / LINCALRDYW control bits */ #define STM32H7_LINCALFACT_NUM 6 diff --git a/drivers/iio/imu/adis_buffer.c b/drivers/iio/imu/adis_buffer.c index 9ac8356d9a95..4998a89d083d 100644 --- a/drivers/iio/imu/adis_buffer.c +++ b/drivers/iio/imu/adis_buffer.c @@ -35,8 +35,11 @@ static int adis_update_scan_mode_burst(struct iio_dev *indio_dev, return -ENOMEM; adis->buffer = kzalloc(burst_length + sizeof(u16), GFP_KERNEL); - if (!adis->buffer) + if (!adis->buffer) { + kfree(adis->xfer); + adis->xfer = NULL; return -ENOMEM; + } tx = adis->buffer + burst_length; tx[0] = ADIS_READ_REG(adis->burst->reg_cmd); @@ -78,8 +81,11 @@ int adis_update_scan_mode(struct iio_dev *indio_dev, return -ENOMEM; adis->buffer = kcalloc(indio_dev->scan_bytes, 2, GFP_KERNEL); - if (!adis->buffer) + if (!adis->buffer) { + kfree(adis->xfer); + adis->xfer = NULL; return -ENOMEM; + } rx = adis->buffer; tx = rx + scan_count; diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h index 80e42c7dbcbe..0fe6999b8257 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h @@ -99,7 +99,9 @@ struct st_lsm6dsx_fs { #define ST_LSM6DSX_FS_LIST_SIZE 4 struct st_lsm6dsx_fs_table_entry { struct st_lsm6dsx_reg reg; + struct st_lsm6dsx_fs fs_avl[ST_LSM6DSX_FS_LIST_SIZE]; + int fs_len; }; /** diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 2d3495560136..fd5ebe1e1594 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -145,6 +145,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(732), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -154,6 +155,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[0] = { IIO_DEGREE_TO_RAD(245), 0x0 }, .fs_avl[1] = { IIO_DEGREE_TO_RAD(500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(2000), 0x3 }, + .fs_len = 3, }, }, }, @@ -215,6 +217,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -225,6 +228,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .decimator = { @@ -327,6 +331,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -337,6 +342,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .decimator = { @@ -448,6 +454,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -458,6 +465,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .decimator = { @@ -563,6 +571,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -573,6 +582,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .batch = { @@ -693,6 +703,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -703,6 +714,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .batch = { @@ -800,6 +812,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 }, .fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 }, .fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 }, + .fs_len = 4, }, [ST_LSM6DSX_ID_GYRO] = { .reg = { @@ -810,6 +823,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = { .fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 }, .fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 }, .fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 }, + .fs_len = 4, }, }, .batch = { @@ -933,11 +947,12 @@ static int st_lsm6dsx_set_full_scale(struct st_lsm6dsx_sensor *sensor, int i, err; fs_table = &sensor->hw->settings->fs_table[sensor->id]; - for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++) + for (i = 0; i < fs_table->fs_len; i++) { if (fs_table->fs_avl[i].gain == gain) break; + } - if (i == ST_LSM6DSX_FS_LIST_SIZE) + if (i == fs_table->fs_len) return -EINVAL; data = ST_LSM6DSX_SHIFT_VAL(fs_table->fs_avl[i].val, @@ -1196,18 +1211,13 @@ static ssize_t st_lsm6dsx_sysfs_scale_avail(struct device *dev, { struct st_lsm6dsx_sensor *sensor = iio_priv(dev_get_drvdata(dev)); const struct st_lsm6dsx_fs_table_entry *fs_table; - enum st_lsm6dsx_sensor_id id = sensor->id; struct st_lsm6dsx_hw *hw = sensor->hw; int i, len = 0; - fs_table = &hw->settings->fs_table[id]; - for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++) { - if (!fs_table->fs_avl[i].gain) - break; - + fs_table = &hw->settings->fs_table[sensor->id]; + for (i = 0; i < fs_table->fs_len; i++) len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ", fs_table->fs_avl[i].gain); - } buf[len - 1] = '\n'; return len; diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c index 66fbcd94642d..ea472cf6db7b 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c @@ -61,6 +61,7 @@ static const struct st_lsm6dsx_ext_dev_settings st_lsm6dsx_ext_dev_table[] = { .gain = 1500, .val = 0x0, }, /* 1500 uG/LSB */ + .fs_len = 1, }, .temp_comp = { .addr = 0x60, @@ -92,9 +93,11 @@ static const struct st_lsm6dsx_ext_dev_settings st_lsm6dsx_ext_dev_table[] = { static void st_lsm6dsx_shub_wait_complete(struct st_lsm6dsx_hw *hw) { struct st_lsm6dsx_sensor *sensor; + u16 odr; sensor = iio_priv(hw->iio_devs[ST_LSM6DSX_ID_ACC]); - msleep((2000U / sensor->odr) + 1); + odr = (hw->enable_mask & BIT(ST_LSM6DSX_ID_ACC)) ? sensor->odr : 13; + msleep((2000U / odr) + 1); } /** @@ -555,13 +558,9 @@ static ssize_t st_lsm6dsx_shub_scale_avail(struct device *dev, int i, len = 0; settings = sensor->ext_info.settings; - for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++) { - u16 val = settings->fs_table.fs_avl[i].gain; - - if (val > 0) - len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ", - val); - } + for (i = 0; i < settings->fs_table.fs_len; i++) + len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ", + settings->fs_table.fs_avl[i].gain); buf[len - 1] = '\n'; return len; diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 08d7e1ef2186..4a1a883dc061 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -314,6 +314,7 @@ config MAX44009 config NOA1305 tristate "ON Semiconductor NOA1305 ambient light sensor" depends on I2C + select REGMAP_I2C help Say Y here if you want to build support for the ON Semiconductor NOA1305 ambient light sensor. diff --git a/drivers/iio/light/opt3001.c b/drivers/iio/light/opt3001.c index e666879007d2..92004a2563ea 100644 --- a/drivers/iio/light/opt3001.c +++ b/drivers/iio/light/opt3001.c @@ -686,6 +686,7 @@ static irqreturn_t opt3001_irq(int irq, void *_iio) struct iio_dev *iio = _iio; struct opt3001 *opt = iio_priv(iio); int ret; + bool wake_result_ready_queue = false; if (!opt->ok_to_ignore_lock) mutex_lock(&opt->lock); @@ -720,13 +721,16 @@ static irqreturn_t opt3001_irq(int irq, void *_iio) } opt->result = ret; opt->result_ready = true; - wake_up(&opt->result_ready_queue); + wake_result_ready_queue = true; } out: if (!opt->ok_to_ignore_lock) mutex_unlock(&opt->lock); + if (wake_result_ready_queue) + wake_up(&opt->result_ready_queue); + return IRQ_HANDLED; } diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c index 51421ac32517..16dacea9eadf 100644 --- a/drivers/iio/light/vcnl4000.c +++ b/drivers/iio/light/vcnl4000.c @@ -398,19 +398,23 @@ static int vcnl4000_probe(struct i2c_client *client, static const struct of_device_id vcnl_4000_of_match[] = { { .compatible = "vishay,vcnl4000", - .data = "VCNL4000", + .data = (void *)VCNL4000, }, { .compatible = "vishay,vcnl4010", - .data = "VCNL4010", + .data = (void *)VCNL4010, }, { - .compatible = "vishay,vcnl4010", - .data = "VCNL4020", + .compatible = "vishay,vcnl4020", + .data = (void *)VCNL4010, + }, + { + .compatible = "vishay,vcnl4040", + .data = (void *)VCNL4040, }, { .compatible = "vishay,vcnl4200", - .data = "VCNL4200", + .data = (void *)VCNL4200, }, {}, }; diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index da10e6ccb43c..5920c0085d35 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4399,6 +4399,7 @@ error2: error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; + kfree(port); while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4407,6 +4408,7 @@ error1: ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); + kfree(port); } free: kfree(cm_dev); @@ -4460,6 +4462,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) spin_unlock_irq(&cm.state_lock); ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); + kfree(port); } kfree(cm_dev); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0e3cf3461999..d78f67623f24 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2396,9 +2396,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); - goto out; + return ret; } mutex_unlock(&conn_id->handler_mutex); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 99c4a55545cf..2dd2cfe9b561 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1987,8 +1987,6 @@ static int iw_query_port(struct ib_device *device, if (!netdev) return -ENODEV; - dev_put(netdev); - port_attr->max_mtu = IB_MTU_4096; port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu); @@ -1996,19 +1994,22 @@ static int iw_query_port(struct ib_device *device, port_attr->state = IB_PORT_DOWN; port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } else { - inetdev = in_dev_get(netdev); + rcu_read_lock(); + inetdev = __in_dev_get_rcu(netdev); if (inetdev && inetdev->ifa_list) { port_attr->state = IB_PORT_ACTIVE; port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; - in_dev_put(inetdev); } else { port_attr->state = IB_PORT_INIT; port_attr->phys_state = IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING; } + + rcu_read_unlock(); } + dev_put(netdev); err = device->ops.query_port(device, port_num, port_attr); if (err) return err; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 7a7474000100..65b36548bc17 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1230,7 +1230,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto err; + goto err_get; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, @@ -1787,10 +1787,6 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); - ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); - if (ret) - goto err_unbind; - if (fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || @@ -1799,13 +1795,15 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_fill; } + ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); + if (ret) + goto err_fill; + nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_fill: - rdma_counter_bind_qpn(device, port, qpn, cntn); -err_unbind: nlmsg_free(msg); err: ib_device_put(device); diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 1ab423b19f77..6eb6d2717ca5 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -426,7 +426,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev) int ret; rdma_for_each_port (dev, i) { - is_ib = rdma_protocol_ib(dev, i++); + is_ib = rdma_protocol_ib(dev, i); if (is_ib) break; } diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index f67a30fda1ed..163ff7ba92b7 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -451,8 +451,10 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) * that the hardware will not attempt to access the MR any more. */ if (!umem_odp->is_implicit_odp) { + mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); + mutex_unlock(&umem_odp->umem_mutex); kvfree(umem_odp->dma_list); kvfree(umem_odp->page_list); } @@ -719,6 +721,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, u64 addr; struct ib_device *dev = umem_odp->umem.ibdev; + lockdep_assert_held(&umem_odp->umem_mutex); + virt = max_t(u64, virt, ib_umem_start(umem_odp)); bound = min_t(u64, bound, ib_umem_end(umem_odp)); /* Note that during the run of this function, the @@ -726,7 +730,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ - mutex_lock(&umem_odp->umem_mutex); for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; if (umem_odp->page_list[idx]) { @@ -757,7 +760,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, umem_odp->npages--; } } - mutex_unlock(&umem_odp->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index a8b9548bd1a2..599340c1f0b8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep, } } -static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd) +static int dump_qp(unsigned long id, struct c4iw_qp *qp, + struct c4iw_debugfs_data *qpd) { int space; int cc; + if (id != qp->wq.sq.qid) + return 0; space = qpd->bufsize - qpd->pos - 1; if (space == 0) @@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file) xa_lock_irq(&qpd->devp->qps); xa_for_each(&qpd->devp->qps, index, qp) - dump_qp(qp, qpd); + dump_qp(index, qp, qpd); xa_unlock_irq(&qpd->devp->qps); qpd->buf[qpd->pos++] = 0; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index aa772ee0706f..35c284af574d 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -275,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp) { int err; - struct fw_ri_tpte tpt; + struct fw_ri_tpte *tpt; u32 stag_idx; static atomic_t key; if (c4iw_fatal_error(rdev)) return -EIO; + tpt = kmalloc(sizeof(*tpt), GFP_KERNEL); + if (!tpt) + return -ENOMEM; + stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -291,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, mutex_lock(&rdev->stats.lock); rdev->stats.stag.fail++; mutex_unlock(&rdev->stats.lock); + kfree(tpt); return -ENOMEM; } mutex_lock(&rdev->stats.lock); @@ -305,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, /* write TPT entry */ if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); + memset(tpt, 0, sizeof(*tpt)); else { - tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) | FW_RI_TPTE_STAGSTATE_V(stag_state) | FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid)); - tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | + tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | (bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) | FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO : FW_RI_VA_BASED_TO))| FW_RI_TPTE_PS_V(page_size)); - tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( + tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3)); - tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); - tpt.va_hi = cpu_to_be32((u32)(to >> 32)); - tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); - tpt.dca_mwbcnt_pstag = cpu_to_be32(0); - tpt.len_hi = cpu_to_be32((u32)(len >> 32)); + tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); + tpt->va_hi = cpu_to_be32((u32)(to >> 32)); + tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); + tpt->dca_mwbcnt_pstag = cpu_to_be32(0); + tpt->len_hi = cpu_to_be32((u32)(len >> 32)); } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt, skb, wr_waitp); + sizeof(*tpt), tpt, skb, wr_waitp); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -334,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, rdev->stats.stag.cur -= 32; mutex_unlock(&rdev->stats.lock); } + kfree(tpt); return err; } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index eb9368be28c1..bbcac539777a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2737,15 +2737,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) srq->flags = T4_SRQ_LIMIT_SUPPORT; - ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL); - if (ret) - goto err_free_queue; - if (udata) { srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); if (!srq_key_mm) { ret = -ENOMEM; - goto err_remove_handle; + goto err_free_queue; } srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); if (!srq_db_key_mm) { @@ -2789,8 +2785,6 @@ err_free_srq_db_key_mm: kfree(srq_db_key_mm); err_free_srq_key_mm: kfree(srq_key_mm); -err_remove_handle: - xa_erase_irq(&rhp->qps, srq->wq.qid); err_free_queue: free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); @@ -2813,8 +2807,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) rhp = srq->rhp; pr_debug("%s id %d\n", __func__, srq->wq.qid); - - xa_erase_irq(&rhp->qps, srq->wq.qid); ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 2395fd4233a7..2ed7bfd5feea 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1526,8 +1526,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); - if (ret < 0) + if (ret < 0) { + kfree(tmp_sdma_rht); goto bail; + } + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 8056930bbe2c..cd9ee1664a69 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2773,6 +2773,10 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) return -ENOMEM; iwibdev = iwdev->iwibdev; rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group); + ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1); + if (ret) + goto error; + ret = ib_register_device(&iwibdev->ibdev, "i40iw%d"); if (ret) goto error; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 59022b744144..d609f4659afb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1298,29 +1298,6 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, return 0; } -static void devx_free_indirect_mkey(struct rcu_head *rcu) -{ - kfree(container_of(rcu, struct devx_obj, devx_mr.rcu)); -} - -/* This function to delete from the radix tree needs to be called before - * destroying the underlying mkey. Otherwise a race might occur in case that - * other thread will get the same mkey before this one will be deleted, - * in that case it will fail via inserting to the tree its own data. - * - * Note: - * An error in the destroy is not expected unless there is some other indirect - * mkey which points to this one. In a kernel cleanup flow it will be just - * destroyed in the iterative destruction call. In a user flow, in case - * the application didn't close in the expected order it's its own problem, - * the mkey won't be part of the tree, in both cases the kernel is safe. - */ -static void devx_cleanup_mkey(struct devx_obj *obj) -{ - xa_erase(&obj->ib_dev->mdev->priv.mkey_table, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); -} - static void devx_cleanup_subscription(struct mlx5_ib_dev *dev, struct devx_event_subscription *sub) { @@ -1362,8 +1339,16 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + /* + * The pagefault_single_data_segment() does commands against + * the mmkey, we must wait for that to stop before freeing the + * mkey, as another allocation could get the same mkey #. + */ + xa_erase(&obj->ib_dev->mdev->priv.mkey_table, + mlx5_base_mkey(obj->devx_mr.mmkey.key)); + synchronize_srcu(&dev->mr_srcu); + } if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); @@ -1382,12 +1367,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, devx_cleanup_subscription(dev, sub_entry); mutex_unlock(&devx_event_table->event_xa_lock); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, - devx_free_indirect_mkey); - return ret; - } - kfree(obj); return ret; } @@ -1491,26 +1470,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); - if (err) - goto obj_destroy; - } - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) - goto err_copy; + goto obj_destroy; if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT) obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type); - obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); + if (err) + goto obj_destroy; + } return 0; -err_copy: - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); obj_destroy: if (obj->flags & DEVX_OBJ_FLAGS_DCT) mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2ceaef3ea3fb..1a98ee2e01c4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -606,7 +606,7 @@ struct mlx5_ib_mr { struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; - int live; + unsigned int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ @@ -639,7 +639,6 @@ struct mlx5_ib_mw { struct mlx5_ib_devx_mr { struct mlx5_core_mkey mmkey; int ndescs; - struct rcu_head rcu; }; struct mlx5_ib_umr_context { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1eff031ef048..630599311586 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -84,32 +84,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -static void update_odp_mr(struct mlx5_ib_mr *mr) -{ - if (is_odp_mr(mr)) { - /* - * This barrier prevents the compiler from moving the - * setting of umem->odp_data->private to point to our - * MR, before reg_umr finished, to ensure that the MR - * initialization have finished before starting to - * handle invalidations. - */ - smp_wmb(); - to_ib_umem_odp(mr->umem)->private = mr; - /* - * Make sure we will see the new - * umem->odp_data->private value in the invalidation - * routines, before we can get page faults on the - * MR. Page faults can happen once we put the MR in - * the tree, below this line. Without the barrier, - * there can be a fault handling and an invalidation - * before umem->odp_data->private == mr is visible to - * the invalidation handler. - */ - smp_wmb(); - } -} - static void reg_mr_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = @@ -1346,8 +1320,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); - update_odp_mr(mr); - if (use_umr) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; @@ -1363,10 +1335,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - mr->live = 1; + if (is_odp_mr(mr)) { + to_ib_umem_odp(mr->umem)->private = mr; atomic_set(&mr->num_pending_prefetch, 0); } + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + smp_store_release(&mr->live, 1); return &mr->ibmr; error: @@ -1441,6 +1415,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (!mr->umem) return -EINVAL; + if (is_odp_mr(mr)) + return -EOPNOTSUPP; + if (flags & IB_MR_REREG_TRANS) { addr = virt_addr; len = length; @@ -1486,8 +1463,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } mr->allocated_from_cache = 0; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - mr->live = 1; } else { /* * Send a UMR WQE @@ -1516,7 +1491,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, npages, len, access_flags); - update_odp_mr(mr); return 0; err: @@ -1607,15 +1581,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Prevent new page faults and * prefetch requests from succeeding */ - mr->live = 0; + WRITE_ONCE(mr->live, 0); + + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&dev->mr_srcu); /* dequeue pending prefetch requests for the mr */ if (atomic_read(&mr->num_pending_prefetch)) flush_workqueue(system_unbound_wq); WARN_ON(atomic_read(&mr->num_pending_prefetch)); - /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ if (!umem_odp->is_implicit_odp) mlx5_ib_invalidate_range(umem_odp, @@ -1987,14 +1962,25 @@ free: int mlx5_ib_dealloc_mw(struct ib_mw *mw) { + struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); int err; - err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, - &mmw->mmkey); - if (!err) - kfree(mmw); - return err; + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + xa_erase(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(mmw->mmkey.key)); + /* + * pagefault_single_data_segment() may be accessing mmw under + * SRCU if the user bound an ODP MR to this MW. + */ + synchronize_srcu(&dev->mr_srcu); + } + + err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); + if (err) + return err; + kfree(mmw); + return 0; } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 2e9b43061797..3f9478d19376 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -178,6 +178,29 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, return; } + /* + * The locking here is pretty subtle. Ideally the implicit children + * list would be protected by the umem_mutex, however that is not + * possible. Instead this uses a weaker update-then-lock pattern: + * + * srcu_read_lock() + * <change children list> + * mutex_lock(umem_mutex) + * mlx5_ib_update_xlt() + * mutex_unlock(umem_mutex) + * destroy lkey + * + * ie any change the children list must be followed by the locked + * update_xlt before destroying. + * + * The umem_mutex provides the acquire/release semantic needed to make + * the children list visible to a racing thread. While SRCU is not + * technically required, using it gives consistent use of the SRCU + * locking around the children list. + */ + lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex); + lockdep_assert_held(&mr->dev->mr_srcu); + odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, nentries * MLX5_IMR_MTT_SIZE, mr); @@ -202,15 +225,22 @@ static void mr_leaf_free_action(struct work_struct *work) struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + int srcu_key; mr->parent = NULL; synchronize_srcu(&mr->dev->mr_srcu); - ib_umem_odp_release(odp); - if (imr->live) + if (smp_load_acquire(&imr->live)) { + srcu_key = srcu_read_lock(&mr->dev->mr_srcu); + mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(imr, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); + srcu_read_unlock(&mr->dev->mr_srcu, srcu_key); + } + ib_umem_odp_release(odp); mlx5_mr_cache_free(mr->dev, mr); if (atomic_dec_and_test(&imr->num_leaf_free)) @@ -278,7 +308,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); - mutex_unlock(&umem_odp->umem_mutex); /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -289,10 +318,12 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { - WRITE_ONCE(umem_odp->dying, 1); + WRITE_ONCE(mr->live, 0); + umem_odp->dying = 1; atomic_inc(&mr->parent->num_leaf_free); schedule_work(&umem_odp->work); } + mutex_unlock(&umem_odp->umem_mutex); } void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) @@ -429,8 +460,6 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; - mr->live = 1; - mlx5_ib_dbg(dev, "key %x dev %p mr %p\n", mr->mmkey.key, dev->mdev, mr); @@ -484,6 +513,8 @@ next_mr: mtt->parent = mr; INIT_WORK(&odp->work, mr_leaf_free_action); + smp_store_release(&mtt->live, 1); + if (!nentries) start_idx = addr >> MLX5_IMR_MTT_SHIFT; nentries++; @@ -536,6 +567,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); + smp_store_release(&imr->live, 1); return imr; } @@ -555,15 +587,19 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) if (mr->parent != imr) continue; + mutex_lock(&umem_odp->umem_mutex); ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); - if (umem_odp->dying) + if (umem_odp->dying) { + mutex_unlock(&umem_odp->umem_mutex); continue; + } - WRITE_ONCE(umem_odp->dying, 1); + umem_odp->dying = 1; atomic_inc(&imr->num_leaf_free); schedule_work(&umem_odp->work); + mutex_unlock(&umem_odp->umem_mutex); } up_read(&per_mm->umem_rwsem); @@ -773,7 +809,7 @@ next_mr: switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!mr->live || !mr->ibmr.pd) { + if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) { mlx5_ib_dbg(dev, "got dead MR\n"); ret = -EFAULT; goto srcu_unlock; @@ -1641,12 +1677,12 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd, mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (mr->ibmr.pd != pd) { + if (!smp_load_acquire(&mr->live)) { ret = false; break; } - if (!mr->live) { + if (mr->ibmr.pd != pd) { ret = false; break; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 6cac0c88cf39..36cdfbdbd325 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -230,8 +230,6 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) pvrdma_page_dir_cleanup(dev, &srq->pdir); - kfree(srq); - atomic_dec(&dev->num_srqs); } diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 430314c8abd9..52d402f39df9 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -182,12 +182,19 @@ void siw_qp_llp_close(struct siw_qp *qp) */ void siw_qp_llp_write_space(struct sock *sk) { - struct siw_cep *cep = sk_to_cep(sk); + struct siw_cep *cep; - cep->sk_write_space(sk); + read_lock(&sk->sk_callback_lock); + + cep = sk_to_cep(sk); + if (cep) { + cep->sk_write_space(sk); - if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) - (void)siw_sq_start(cep->qp); + if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + (void)siw_sq_start(cep->qp); + } + + read_unlock(&sk->sk_callback_lock); } static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) diff --git a/drivers/input/misc/da9063_onkey.c b/drivers/input/misc/da9063_onkey.c index dace8577fa43..79851923ee57 100644 --- a/drivers/input/misc/da9063_onkey.c +++ b/drivers/input/misc/da9063_onkey.c @@ -232,10 +232,7 @@ static int da9063_onkey_probe(struct platform_device *pdev) onkey->input->phys = onkey->phys; onkey->input->dev.parent = &pdev->dev; - if (onkey->key_power) - input_set_capability(onkey->input, EV_KEY, KEY_POWER); - - input_set_capability(onkey->input, EV_KEY, KEY_SLEEP); + input_set_capability(onkey->input, EV_KEY, KEY_POWER); INIT_DELAYED_WORK(&onkey->work, da9063_poll_on); diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index 97e3639e99d0..08520b3a18b8 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -92,11 +92,18 @@ soc_button_device_create(struct platform_device *pdev, continue; gpio = soc_button_lookup_gpio(&pdev->dev, info->acpi_index); - if (gpio < 0 && gpio != -ENOENT) { - error = gpio; - goto err_free_mem; - } else if (!gpio_is_valid(gpio)) { - /* Skip GPIO if not present */ + if (!gpio_is_valid(gpio)) { + /* + * Skip GPIO if not present. Note we deliberately + * ignore -EPROBE_DEFER errors here. On some devices + * Intel is using so called virtual GPIOs which are not + * GPIOs at all but some way for AML code to check some + * random status bits without need a custom opregion. + * In some cases the resources table we parse points to + * such a virtual GPIO, since these are not real GPIOs + * we do not have a driver for these so they will never + * show up, therefore we ignore -EPROBE_DEFER. + */ continue; } diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 04fe43440a3c..2d8434b7b623 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1827,31 +1827,6 @@ static int elantech_create_smbus(struct psmouse *psmouse, leave_breadcrumbs); } -static bool elantech_use_host_notify(struct psmouse *psmouse, - struct elantech_device_info *info) -{ - if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version)) - return true; - - switch (info->bus) { - case ETP_BUS_PS2_ONLY: - /* expected case */ - break; - case ETP_BUS_SMB_HST_NTFY_ONLY: - case ETP_BUS_PS2_SMB_HST_NTFY: - /* SMbus implementation is stable since 2018 */ - if (dmi_get_bios_year() >= 2018) - return true; - /* fall through */ - default: - psmouse_dbg(psmouse, - "Ignoring SMBus bus provider %d\n", info->bus); - break; - } - - return false; -} - /** * elantech_setup_smbus - called once the PS/2 devices are enumerated * and decides to instantiate a SMBus InterTouch device. @@ -1871,7 +1846,7 @@ static int elantech_setup_smbus(struct psmouse *psmouse, * i2c_blacklist_pnp_ids. * Old ICs are up to the user to decide. */ - if (!elantech_use_host_notify(psmouse, info) || + if (!ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version) || psmouse_matches_pnp_id(psmouse, i2c_blacklist_pnp_ids)) return -ENXIO; } @@ -1891,6 +1866,34 @@ static int elantech_setup_smbus(struct psmouse *psmouse, return 0; } +static bool elantech_use_host_notify(struct psmouse *psmouse, + struct elantech_device_info *info) +{ + if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version)) + return true; + + switch (info->bus) { + case ETP_BUS_PS2_ONLY: + /* expected case */ + break; + case ETP_BUS_SMB_ALERT_ONLY: + /* fall-through */ + case ETP_BUS_PS2_SMB_ALERT: + psmouse_dbg(psmouse, "Ignoring SMBus provider through alert protocol.\n"); + break; + case ETP_BUS_SMB_HST_NTFY_ONLY: + /* fall-through */ + case ETP_BUS_PS2_SMB_HST_NTFY: + return true; + default: + psmouse_dbg(psmouse, + "Ignoring SMBus bus provider %d.\n", + info->bus); + } + + return false; +} + int elantech_init_smbus(struct psmouse *psmouse) { struct elantech_device_info info; diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 772493b1f665..190b9974526b 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -146,7 +146,7 @@ static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev) } mutex_lock(&data->irq_mutex); - bitmap_and(data->irq_status, data->irq_status, data->current_irq_mask, + bitmap_and(data->irq_status, data->irq_status, data->fn_irq_bits, data->irq_count); /* * At this point, irq_status has all bits that are set in the @@ -385,6 +385,8 @@ static int rmi_driver_set_irq_bits(struct rmi_device *rmi_dev, bitmap_copy(data->current_irq_mask, data->new_irq_mask, data->num_of_irq_regs); + bitmap_or(data->fn_irq_bits, data->fn_irq_bits, mask, data->irq_count); + error_unlock: mutex_unlock(&data->irq_mutex); return error; @@ -398,6 +400,8 @@ static int rmi_driver_clear_irq_bits(struct rmi_device *rmi_dev, struct device *dev = &rmi_dev->dev; mutex_lock(&data->irq_mutex); + bitmap_andnot(data->fn_irq_bits, + data->fn_irq_bits, mask, data->irq_count); bitmap_andnot(data->new_irq_mask, data->current_irq_mask, mask, data->irq_count); diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 5178ea8b5f30..fb43aa708660 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -53,6 +53,7 @@ struct goodix_ts_data { const char *cfg_name; struct completion firmware_loading_complete; unsigned long irq_flags; + unsigned int contact_size; }; #define GOODIX_GPIO_INT_NAME "irq" @@ -62,6 +63,7 @@ struct goodix_ts_data { #define GOODIX_MAX_WIDTH 4096 #define GOODIX_INT_TRIGGER 1 #define GOODIX_CONTACT_SIZE 8 +#define GOODIX_MAX_CONTACT_SIZE 9 #define GOODIX_MAX_CONTACTS 10 #define GOODIX_CONFIG_MAX_LENGTH 240 @@ -144,6 +146,19 @@ static const struct dmi_system_id rotated_screen[] = { {} }; +static const struct dmi_system_id nine_bytes_report[] = { +#if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + .ident = "Lenovo YogaBook", + /* YB1-X91L/F and YB1-X90L/F */ + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9") + } + }, +#endif + {} +}; + /** * goodix_i2c_read - read data from a register of the i2c slave device. * @@ -249,7 +264,7 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) max_timeout = jiffies + msecs_to_jiffies(GOODIX_BUFFER_STATUS_TIMEOUT); do { error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR, - data, GOODIX_CONTACT_SIZE + 1); + data, ts->contact_size + 1); if (error) { dev_err(&ts->client->dev, "I2C transfer error: %d\n", error); @@ -262,12 +277,12 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) return -EPROTO; if (touch_num > 1) { - data += 1 + GOODIX_CONTACT_SIZE; + data += 1 + ts->contact_size; error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR + - 1 + GOODIX_CONTACT_SIZE, + 1 + ts->contact_size, data, - GOODIX_CONTACT_SIZE * + ts->contact_size * (touch_num - 1)); if (error) return error; @@ -286,7 +301,7 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data) return 0; } -static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data) +static void goodix_ts_report_touch_8b(struct goodix_ts_data *ts, u8 *coor_data) { int id = coor_data[0] & 0x0F; int input_x = get_unaligned_le16(&coor_data[1]); @@ -301,6 +316,21 @@ static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data) input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w); } +static void goodix_ts_report_touch_9b(struct goodix_ts_data *ts, u8 *coor_data) +{ + int id = coor_data[1] & 0x0F; + int input_x = get_unaligned_le16(&coor_data[3]); + int input_y = get_unaligned_le16(&coor_data[5]); + int input_w = get_unaligned_le16(&coor_data[7]); + + input_mt_slot(ts->input_dev, id); + input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, true); + touchscreen_report_pos(ts->input_dev, &ts->prop, + input_x, input_y, true); + input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR, input_w); + input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w); +} + /** * goodix_process_events - Process incoming events * @@ -311,7 +341,7 @@ static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data) */ static void goodix_process_events(struct goodix_ts_data *ts) { - u8 point_data[1 + GOODIX_CONTACT_SIZE * GOODIX_MAX_CONTACTS]; + u8 point_data[1 + GOODIX_MAX_CONTACT_SIZE * GOODIX_MAX_CONTACTS]; int touch_num; int i; @@ -326,8 +356,12 @@ static void goodix_process_events(struct goodix_ts_data *ts) input_report_key(ts->input_dev, KEY_LEFTMETA, point_data[0] & BIT(4)); for (i = 0; i < touch_num; i++) - goodix_ts_report_touch(ts, - &point_data[1 + GOODIX_CONTACT_SIZE * i]); + if (ts->contact_size == 9) + goodix_ts_report_touch_9b(ts, + &point_data[1 + ts->contact_size * i]); + else + goodix_ts_report_touch_8b(ts, + &point_data[1 + ts->contact_size * i]); input_mt_sync_frame(ts->input_dev); input_sync(ts->input_dev); @@ -730,6 +764,13 @@ static int goodix_configure_dev(struct goodix_ts_data *ts) "Applying '180 degrees rotated screen' quirk\n"); } + if (dmi_check_system(nine_bytes_report)) { + ts->contact_size = 9; + + dev_dbg(&ts->client->dev, + "Non-standard 9-bytes report format quirk\n"); + } + error = input_mt_init_slots(ts->input_dev, ts->max_touch_num, INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED); if (error) { @@ -810,6 +851,7 @@ static int goodix_ts_probe(struct i2c_client *client, ts->client = client; i2c_set_clientdata(client, ts); init_completion(&ts->firmware_loading_complete); + ts->contact_size = GOODIX_CONTACT_SIZE; error = goodix_get_gpio_config(ts); if (error) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2369b8af81f3..dd555078258c 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -583,7 +583,8 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) retry: type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; - pasid = PPR_PASID(*(u64 *)&event[0]); + pasid = (event[0] & EVENT_DOMID_MASK_HI) | + (event[1] & EVENT_DOMID_MASK_LO); flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; address = (u64)(((u64)event[3]) << 32) | event[2]; @@ -616,7 +617,7 @@ retry: address, flags); break; case EVENT_TYPE_PAGE_TAB_ERR: - dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", + dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; @@ -1463,6 +1464,7 @@ static void free_pagetable(struct protection_domain *domain) * to 64 bits. */ static bool increase_address_space(struct protection_domain *domain, + unsigned long address, gfp_t gfp) { unsigned long flags; @@ -1471,8 +1473,8 @@ static bool increase_address_space(struct protection_domain *domain, spin_lock_irqsave(&domain->lock, flags); - if (WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) - /* address space already 64 bit large */ + if (address <= PM_LEVEL_SIZE(domain->mode) || + WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); @@ -1505,7 +1507,7 @@ static u64 *alloc_pte(struct protection_domain *domain, BUG_ON(!is_power_of_2(page_size)); while (address > PM_LEVEL_SIZE(domain->mode)) - *updated = increase_address_space(domain, gfp) || *updated; + *updated = increase_address_space(domain, address, gfp) || *updated; level = domain->mode - 1; pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index c9c1612d52e0..17bd5a349119 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -130,8 +130,8 @@ #define EVENT_TYPE_INV_PPR_REQ 0x9 #define EVENT_DEVID_MASK 0xffff #define EVENT_DEVID_SHIFT 0 -#define EVENT_DOMID_MASK 0xffff -#define EVENT_DOMID_SHIFT 0 +#define EVENT_DOMID_MASK_LO 0xffff +#define EVENT_DOMID_MASK_HI 0xf0000 #define EVENT_FLAGS_MASK 0xfff #define EVENT_FLAGS_SHIFT 0x10 diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b18aac4c105e..7c503a6bc585 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -812,6 +812,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, return 0; out_clear_smmu: + __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); smmu_domain->smmu = NULL; out_unlock: mutex_unlock(&smmu_domain->init_mutex); diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 4c91359057c5..ca51036aa53c 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -166,6 +166,9 @@ #define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2) #define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4) +#define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL +#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL + /* IOPTE accessors */ #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d)) @@ -1015,27 +1018,56 @@ arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) static struct io_pgtable * arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) { - struct io_pgtable *iop; + struct arm_lpae_io_pgtable *data; - if (cfg->ias != 48 || cfg->oas > 40) + /* No quirks for Mali (hopefully) */ + if (cfg->quirks) + return NULL; + + if (cfg->ias > 48 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie); - if (iop) { - u64 mair, ttbr; - /* Copy values as union fields overlap */ - mair = cfg->arm_lpae_s1_cfg.mair[0]; - ttbr = cfg->arm_lpae_s1_cfg.ttbr[0]; + data = arm_lpae_alloc_pgtable(cfg); + if (!data) + return NULL; - cfg->arm_mali_lpae_cfg.memattr = mair; - cfg->arm_mali_lpae_cfg.transtab = ttbr | - ARM_MALI_LPAE_TTBR_READ_INNER | - ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; + /* Mali seems to need a full 4-level table regardless of IAS */ + if (data->levels < ARM_LPAE_MAX_LEVELS) { + data->levels = ARM_LPAE_MAX_LEVELS; + data->pgd_size = sizeof(arm_lpae_iopte); } + /* + * MEMATTR: Mali has no actual notion of a non-cacheable type, so the + * best we can do is mimic the out-of-tree driver and hope that the + * "implementation-defined caching policy" is good enough. Similarly, + * we'll use it for the sake of a valid attribute for our 'device' + * index, although callers should never request that in practice. + */ + cfg->arm_mali_lpae_cfg.memattr = + (ARM_MALI_LPAE_MEMATTR_IMP_DEF + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | + (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | + (ARM_MALI_LPAE_MEMATTR_IMP_DEF + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); - return iop; + data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg); + if (!data->pgd) + goto out_free_data; + + /* Ensure the empty pgd is visible before TRANSTAB can be written */ + wmb(); + + cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) | + ARM_MALI_LPAE_TTBR_READ_INNER | + ARM_MALI_LPAE_TTBR_ADRMODE_TABLE; + return &data->iop; + +out_free_data: + kfree(data); + return NULL; } struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 9da8309f7170..237103465b82 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1086,8 +1086,6 @@ static int ipmmu_probe(struct platform_device *pdev) mmu->num_ctx = min(IPMMU_CTX_MAX, mmu->features->number_of_contexts); - irq = platform_get_irq(pdev, 0); - /* * Determine if this IPMMU instance is a root device by checking for * the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property. @@ -1106,6 +1104,7 @@ static int ipmmu_probe(struct platform_device *pdev) /* Root devices have mandatory IRQs */ if (ipmmu_is_root(mmu)) { + irq = platform_get_irq(pdev, 0); if (irq < 0) { dev_err(&pdev->dev, "no IRQ found\n"); return irq; diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 26290f310f90..4dcbf68dfda4 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -100,6 +100,7 @@ struct rk_iommu { struct device *dev; void __iomem **bases; int num_mmu; + int num_irq; struct clk_bulk_data *clocks; int num_clocks; bool reset_disabled; @@ -1136,7 +1137,7 @@ static int rk_iommu_probe(struct platform_device *pdev) struct rk_iommu *iommu; struct resource *res; int num_res = pdev->num_resources; - int err, i, irq; + int err, i; iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -1163,6 +1164,10 @@ static int rk_iommu_probe(struct platform_device *pdev) if (iommu->num_mmu == 0) return PTR_ERR(iommu->bases[0]); + iommu->num_irq = platform_irq_count(pdev); + if (iommu->num_irq < 0) + return iommu->num_irq; + iommu->reset_disabled = device_property_read_bool(dev, "rockchip,disable-mmu-reset"); @@ -1219,8 +1224,9 @@ static int rk_iommu_probe(struct platform_device *pdev) pm_runtime_enable(dev); - i = 0; - while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) { + for (i = 0; i < iommu->num_irq; i++) { + int irq = platform_get_irq(pdev, i); + if (irq < 0) return irq; @@ -1245,10 +1251,13 @@ err_unprepare_clocks: static void rk_iommu_shutdown(struct platform_device *pdev) { struct rk_iommu *iommu = platform_get_drvdata(pdev); - int i = 0, irq; + int i; + + for (i = 0; i < iommu->num_irq; i++) { + int irq = platform_get_irq(pdev, i); - while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) devm_free_irq(iommu->dev, irq, iommu); + } pm_runtime_force_suspend(&pdev->dev); } diff --git a/drivers/irqchip/irq-al-fic.c b/drivers/irqchip/irq-al-fic.c index 1a57cee3efab..0b0a73739756 100644 --- a/drivers/irqchip/irq-al-fic.c +++ b/drivers/irqchip/irq-al-fic.c @@ -15,6 +15,7 @@ /* FIC Registers */ #define AL_FIC_CAUSE 0x00 +#define AL_FIC_SET_CAUSE 0x08 #define AL_FIC_MASK 0x10 #define AL_FIC_CONTROL 0x28 @@ -126,6 +127,16 @@ static void al_fic_irq_handler(struct irq_desc *desc) chained_irq_exit(irqchip, desc); } +static int al_fic_irq_retrigger(struct irq_data *data) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); + struct al_fic *fic = gc->private; + + writel_relaxed(BIT(data->hwirq), fic->base + AL_FIC_SET_CAUSE); + + return 1; +} + static int al_fic_register(struct device_node *node, struct al_fic *fic) { @@ -159,6 +170,7 @@ static int al_fic_register(struct device_node *node, gc->chip_types->chip.irq_unmask = irq_gc_mask_clr_bit; gc->chip_types->chip.irq_ack = irq_gc_ack_clr_bit; gc->chip_types->chip.irq_set_type = al_fic_irq_set_type; + gc->chip_types->chip.irq_retrigger = al_fic_irq_retrigger; gc->chip_types->chip.flags = IRQCHIP_SKIP_SET_WAKE; gc->private = fic; diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index 6acad2ea0fb3..29333497ba10 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -313,6 +313,7 @@ static void __init sama5d3_aic_irq_fixup(void) static const struct of_device_id aic5_irq_fixups[] __initconst = { { .compatible = "atmel,sama5d3", .data = sama5d3_aic_irq_fixup }, { .compatible = "atmel,sama5d4", .data = sama5d3_aic_irq_fixup }, + { .compatible = "microchip,sam9x60", .data = sama5d3_aic_irq_fixup }, { /* sentinel */ }, }; @@ -390,3 +391,12 @@ static int __init sama5d4_aic5_of_init(struct device_node *node, return aic5_of_init(node, parent, NR_SAMA5D4_IRQS); } IRQCHIP_DECLARE(sama5d4_aic5, "atmel,sama5d4-aic", sama5d4_aic5_of_init); + +#define NR_SAM9X60_IRQS 50 + +static int __init sam9x60_aic5_of_init(struct device_node *node, + struct device_node *parent) +{ + return aic5_of_init(node, parent, NR_SAM9X60_IRQS); +} +IRQCHIP_DECLARE(sam9x60_aic5, "microchip,sam9x60-aic", sam9x60_aic5_of_init); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 422664ac5f53..1edc99335a94 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -59,7 +59,7 @@ static struct gic_chip_data gic_data __read_mostly; static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); #define GIC_ID_NR (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer)) -#define GIC_LINE_NR max(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U) +#define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U) #define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer) /* diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index c72c036aea76..daefc52b0ec5 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -97,7 +97,7 @@ static inline void plic_irq_toggle(const struct cpumask *mask, } } -static void plic_irq_enable(struct irq_data *d) +static void plic_irq_unmask(struct irq_data *d) { unsigned int cpu = cpumask_any_and(irq_data_get_affinity_mask(d), cpu_online_mask); @@ -106,7 +106,7 @@ static void plic_irq_enable(struct irq_data *d) plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1); } -static void plic_irq_disable(struct irq_data *d) +static void plic_irq_mask(struct irq_data *d) { plic_irq_toggle(cpu_possible_mask, d->hwirq, 0); } @@ -125,10 +125,8 @@ static int plic_set_affinity(struct irq_data *d, if (cpu >= nr_cpu_ids) return -EINVAL; - if (!irqd_irq_disabled(d)) { - plic_irq_toggle(cpu_possible_mask, d->hwirq, 0); - plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1); - } + plic_irq_toggle(cpu_possible_mask, d->hwirq, 0); + plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1); irq_data_update_effective_affinity(d, cpumask_of(cpu)); @@ -136,14 +134,18 @@ static int plic_set_affinity(struct irq_data *d, } #endif +static void plic_irq_eoi(struct irq_data *d) +{ + struct plic_handler *handler = this_cpu_ptr(&plic_handlers); + + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); +} + static struct irq_chip plic_chip = { .name = "SiFive PLIC", - /* - * There is no need to mask/unmask PLIC interrupts. They are "masked" - * by reading claim and "unmasked" when writing it back. - */ - .irq_enable = plic_irq_enable, - .irq_disable = plic_irq_disable, + .irq_mask = plic_irq_mask, + .irq_unmask = plic_irq_unmask, + .irq_eoi = plic_irq_eoi, #ifdef CONFIG_SMP .irq_set_affinity = plic_set_affinity, #endif @@ -152,7 +154,7 @@ static struct irq_chip plic_chip = { static int plic_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) { - irq_set_chip_and_handler(irq, &plic_chip, handle_simple_irq); + irq_set_chip_and_handler(irq, &plic_chip, handle_fasteoi_irq); irq_set_chip_data(irq, NULL); irq_set_noprobe(irq); return 0; @@ -188,7 +190,6 @@ static void plic_handle_irq(struct pt_regs *regs) hwirq); else generic_handle_irq(irq); - writel(hwirq, claim); } csr_set(sie, SIE_SEIE); } diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index d249cf8ac277..8346e6d1816c 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -542,7 +542,7 @@ static void wake_migration_worker(struct cache *cache) static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache) { - return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT); + return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO); } static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell) @@ -554,9 +554,7 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache) { struct dm_cache_migration *mg; - mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT); - if (!mg) - return NULL; + mg = mempool_alloc(&cache->migration_pool, GFP_NOIO); memset(mg, 0, sizeof(*mg)); @@ -664,10 +662,6 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi struct dm_bio_prison_cell_v2 *cell_prealloc, *cell; cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */ - if (!cell_prealloc) { - defer_bio(cache, bio); - return false; - } build_key(oblock, end, &key); r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell); @@ -1493,11 +1487,6 @@ static int mg_lock_writes(struct dm_cache_migration *mg) struct dm_bio_prison_cell_v2 *prealloc; prealloc = alloc_prison_cell(cache); - if (!prealloc) { - DMERR_LIMIT("%s: alloc_prison_cell failed", cache_device_name(cache)); - mg_complete(mg, false); - return -ENOMEM; - } /* * Prevent writes to the block, but allow reads to continue. @@ -1535,11 +1524,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio } mg = alloc_migration(cache); - if (!mg) { - policy_complete_background_work(cache->policy, op, false); - background_work_end(cache); - return -ENOMEM; - } mg->op = op; mg->overwrite_bio = bio; @@ -1628,10 +1612,6 @@ static int invalidate_lock(struct dm_cache_migration *mg) struct dm_bio_prison_cell_v2 *prealloc; prealloc = alloc_prison_cell(cache); - if (!prealloc) { - invalidate_complete(mg, false); - return -ENOMEM; - } build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key); r = dm_cell_lock_v2(cache->prison, &key, @@ -1669,10 +1649,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock, return -EPERM; mg = alloc_migration(cache); - if (!mg) { - background_work_end(cache); - return -ENOMEM; - } mg->overwrite_bio = bio; mg->invalidate_cblock = cblock; diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c index cd6f9e9fc98e..4ca8f1977222 100644 --- a/drivers/md/dm-clone-target.c +++ b/drivers/md/dm-clone-target.c @@ -591,8 +591,8 @@ static struct hash_table_bucket *get_hash_table_bucket(struct clone *clone, * * NOTE: Must be called with the bucket lock held */ -struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket, - unsigned long region_nr) +static struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket, + unsigned long region_nr) { struct dm_clone_region_hydration *hd; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index f150f5c5492b..4fb1a40e68a0 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -18,7 +18,6 @@ #include <linux/vmalloc.h> #include <linux/log2.h> #include <linux/dm-kcopyd.h> -#include <linux/semaphore.h> #include "dm.h" @@ -107,8 +106,8 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store; - /* Maximum number of in-flight COW jobs. */ - struct semaphore cow_count; + unsigned in_progress; + struct wait_queue_head in_progress_wait; struct dm_kcopyd_client *kcopyd_client; @@ -162,8 +161,8 @@ struct dm_snapshot { */ #define DEFAULT_COW_THRESHOLD 2048 -static int cow_threshold = DEFAULT_COW_THRESHOLD; -module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); +static unsigned cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644); MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, @@ -1327,7 +1326,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); + init_waitqueue_head(&s->in_progress_wait); s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { @@ -1509,9 +1508,56 @@ static void snapshot_dtr(struct dm_target *ti) dm_put_device(ti, s->origin); + WARN_ON(s->in_progress); + kfree(s); } +static void account_start_copy(struct dm_snapshot *s) +{ + spin_lock(&s->in_progress_wait.lock); + s->in_progress++; + spin_unlock(&s->in_progress_wait.lock); +} + +static void account_end_copy(struct dm_snapshot *s) +{ + spin_lock(&s->in_progress_wait.lock); + BUG_ON(!s->in_progress); + s->in_progress--; + if (likely(s->in_progress <= cow_threshold) && + unlikely(waitqueue_active(&s->in_progress_wait))) + wake_up_locked(&s->in_progress_wait); + spin_unlock(&s->in_progress_wait.lock); +} + +static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins) +{ + if (unlikely(s->in_progress > cow_threshold)) { + spin_lock(&s->in_progress_wait.lock); + if (likely(s->in_progress > cow_threshold)) { + /* + * NOTE: this throttle doesn't account for whether + * the caller is servicing an IO that will trigger a COW + * so excess throttling may result for chunks not required + * to be COW'd. But if cow_threshold was reached, extra + * throttling is unlikely to negatively impact performance. + */ + DECLARE_WAITQUEUE(wait, current); + __add_wait_queue(&s->in_progress_wait, &wait); + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&s->in_progress_wait.lock); + if (unlock_origins) + up_read(&_origins_lock); + io_schedule(); + remove_wait_queue(&s->in_progress_wait, &wait); + return false; + } + spin_unlock(&s->in_progress_wait.lock); + } + return true; +} + /* * Flush a list of buffers. */ @@ -1527,7 +1573,7 @@ static void flush_bios(struct bio *bio) } } -static int do_origin(struct dm_dev *origin, struct bio *bio); +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit); /* * Flush a list of buffers. @@ -1540,7 +1586,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) while (bio) { n = bio->bi_next; bio->bi_next = NULL; - r = do_origin(s->origin, bio); + r = do_origin(s->origin, bio, false); if (r == DM_MAPIO_REMAPPED) generic_make_request(bio); bio = n; @@ -1732,7 +1778,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) rb_link_node(&pe->out_of_order_node, parent, p); rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); } - up(&s->cow_count); + account_end_copy(s); } /* @@ -1756,7 +1802,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ - down(&s->cow_count); + account_start_copy(s); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } @@ -1776,7 +1822,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; - down(&s->cow_count); + account_start_copy(s); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); @@ -1866,7 +1912,7 @@ static void zero_callback(int read_err, unsigned long write_err, void *context) struct bio *bio = context; struct dm_snapshot *s = bio->bi_private; - up(&s->cow_count); + account_end_copy(s); bio->bi_status = write_err ? BLK_STS_IOERR : 0; bio_endio(bio); } @@ -1880,7 +1926,7 @@ static void zero_exception(struct dm_snapshot *s, struct dm_exception *e, dest.sector = bio->bi_iter.bi_sector; dest.count = s->store->chunk_size; - down(&s->cow_count); + account_start_copy(s); WARN_ON_ONCE(bio->bi_private); bio->bi_private = s; dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio); @@ -1916,6 +1962,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (!s->valid) return DM_MAPIO_KILL; + if (bio_data_dir(bio) == WRITE) { + while (unlikely(!wait_for_in_progress(s, false))) + ; /* wait_for_in_progress() has slept */ + } + down_read(&s->lock); dm_exception_table_lock(&lock); @@ -2112,7 +2163,7 @@ redirect_to_origin: if (bio_data_dir(bio) == WRITE) { up_write(&s->lock); - return do_origin(s->origin, bio); + return do_origin(s->origin, bio, false); } out_unlock: @@ -2487,15 +2538,24 @@ next_snapshot: /* * Called on a write from the origin driver. */ -static int do_origin(struct dm_dev *origin, struct bio *bio) +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit) { struct origin *o; int r = DM_MAPIO_REMAPPED; +again: down_read(&_origins_lock); o = __lookup_origin(origin->bdev); - if (o) + if (o) { + if (limit) { + struct dm_snapshot *s; + list_for_each_entry(s, &o->snapshots, list) + if (unlikely(!wait_for_in_progress(s, true))) + goto again; + } + r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio); + } up_read(&_origins_lock); return r; @@ -2608,7 +2668,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio) dm_accept_partial_bio(bio, available_sectors); /* Only tell snapshots if this is a write */ - return do_origin(o->dev, bio); + return do_origin(o->dev, bio, true); } /* diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f61693e59684..1e772287b1c8 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -154,7 +154,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) } else { pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", mdname(mddev)); - pr_err("md/raid0: please set raid.default_layout to 1 or 2\n"); + pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); err = -ENOTSUPP; goto abort; } diff --git a/drivers/media/usb/stkwebcam/stk-webcam.c b/drivers/media/usb/stkwebcam/stk-webcam.c index cfca3c70599b..21f90a887485 100644 --- a/drivers/media/usb/stkwebcam/stk-webcam.c +++ b/drivers/media/usb/stkwebcam/stk-webcam.c @@ -643,8 +643,7 @@ static int v4l_stk_release(struct file *fp) dev->owner = NULL; } - if (is_present(dev)) - usb_autopm_put_interface(dev->interface); + usb_autopm_put_interface(dev->interface); mutex_unlock(&dev->lock); return v4l2_fh_release(fp); } diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c index 32747425297d..64fff6abe60e 100644 --- a/drivers/memstick/host/jmb38x_ms.c +++ b/drivers/memstick/host/jmb38x_ms.c @@ -941,7 +941,7 @@ static int jmb38x_ms_probe(struct pci_dev *pdev, if (!cnt) { rc = -ENODEV; pci_dev_busy = 1; - goto err_out; + goto err_out_int; } jm = kzalloc(sizeof(struct jmb38x_ms) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 47ae84afac2e..1b1a794d639d 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -527,6 +527,7 @@ static int fastrpc_dma_buf_attach(struct dma_buf *dmabuf, FASTRPC_PHYS(buffer->phys), buffer->size); if (ret < 0) { dev_err(buffer->dev, "failed to get scatterlist from DMA API\n"); + kfree(a); return -EINVAL; } diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 32e9b1aed2ca..0a2b99e1af45 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -218,13 +218,21 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev) { int ret; + /* No need to enable the client if nothing is needed from it */ + if (!cldev->bus->fw_f_fw_ver_supported && + !cldev->bus->hbm_f_os_supported) + return; + ret = mei_cldev_enable(cldev); if (ret) return; - ret = mei_fwver(cldev); - if (ret < 0) - dev_err(&cldev->dev, "FW version command failed %d\n", ret); + if (cldev->bus->fw_f_fw_ver_supported) { + ret = mei_fwver(cldev); + if (ret < 0) + dev_err(&cldev->dev, "FW version command failed %d\n", + ret); + } if (cldev->bus->hbm_f_os_supported) { ret = mei_osver(cldev); diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c index c681f6fab342..93027fd96c71 100644 --- a/drivers/misc/mei/hdcp/mei_hdcp.c +++ b/drivers/misc/mei/hdcp/mei_hdcp.c @@ -27,18 +27,6 @@ #include "mei_hdcp.h" -static inline u8 mei_get_ddi_index(enum port port) -{ - switch (port) { - case PORT_A: - return MEI_DDI_A; - case PORT_B ... PORT_F: - return (u8)port; - default: - return MEI_DDI_INVALID_PORT; - } -} - /** * mei_hdcp_initiate_session() - Initiate a Wired HDCP2.2 Tx Session in ME FW * @dev: device corresponding to the mei_cl_device @@ -69,7 +57,8 @@ mei_hdcp_initiate_session(struct device *dev, struct hdcp_port_data *data, WIRED_CMD_BUF_LEN_INITIATE_HDCP2_SESSION_IN; session_init_in.port.integrated_port_type = data->port_type; - session_init_in.port.physical_port = mei_get_ddi_index(data->port); + session_init_in.port.physical_port = (u8)data->fw_ddi; + session_init_in.port.attached_transcoder = (u8)data->fw_tc; session_init_in.protocol = data->protocol; byte = mei_cldev_send(cldev, (u8 *)&session_init_in, @@ -138,7 +127,8 @@ mei_hdcp_verify_receiver_cert_prepare_km(struct device *dev, WIRED_CMD_BUF_LEN_VERIFY_RECEIVER_CERT_IN; verify_rxcert_in.port.integrated_port_type = data->port_type; - verify_rxcert_in.port.physical_port = mei_get_ddi_index(data->port); + verify_rxcert_in.port.physical_port = (u8)data->fw_ddi; + verify_rxcert_in.port.attached_transcoder = (u8)data->fw_tc; verify_rxcert_in.cert_rx = rx_cert->cert_rx; memcpy(verify_rxcert_in.r_rx, &rx_cert->r_rx, HDCP_2_2_RRX_LEN); @@ -208,7 +198,8 @@ mei_hdcp_verify_hprime(struct device *dev, struct hdcp_port_data *data, send_hprime_in.header.buffer_len = WIRED_CMD_BUF_LEN_AKE_SEND_HPRIME_IN; send_hprime_in.port.integrated_port_type = data->port_type; - send_hprime_in.port.physical_port = mei_get_ddi_index(data->port); + send_hprime_in.port.physical_port = (u8)data->fw_ddi; + send_hprime_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(send_hprime_in.h_prime, rx_hprime->h_prime, HDCP_2_2_H_PRIME_LEN); @@ -265,7 +256,8 @@ mei_hdcp_store_pairing_info(struct device *dev, struct hdcp_port_data *data, WIRED_CMD_BUF_LEN_SEND_PAIRING_INFO_IN; pairing_info_in.port.integrated_port_type = data->port_type; - pairing_info_in.port.physical_port = mei_get_ddi_index(data->port); + pairing_info_in.port.physical_port = (u8)data->fw_ddi; + pairing_info_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(pairing_info_in.e_kh_km, pairing_info->e_kh_km, HDCP_2_2_E_KH_KM_LEN); @@ -323,7 +315,8 @@ mei_hdcp_initiate_locality_check(struct device *dev, lc_init_in.header.buffer_len = WIRED_CMD_BUF_LEN_INIT_LOCALITY_CHECK_IN; lc_init_in.port.integrated_port_type = data->port_type; - lc_init_in.port.physical_port = mei_get_ddi_index(data->port); + lc_init_in.port.physical_port = (u8)data->fw_ddi; + lc_init_in.port.attached_transcoder = (u8)data->fw_tc; byte = mei_cldev_send(cldev, (u8 *)&lc_init_in, sizeof(lc_init_in)); if (byte < 0) { @@ -378,7 +371,8 @@ mei_hdcp_verify_lprime(struct device *dev, struct hdcp_port_data *data, WIRED_CMD_BUF_LEN_VALIDATE_LOCALITY_IN; verify_lprime_in.port.integrated_port_type = data->port_type; - verify_lprime_in.port.physical_port = mei_get_ddi_index(data->port); + verify_lprime_in.port.physical_port = (u8)data->fw_ddi; + verify_lprime_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(verify_lprime_in.l_prime, rx_lprime->l_prime, HDCP_2_2_L_PRIME_LEN); @@ -435,7 +429,8 @@ static int mei_hdcp_get_session_key(struct device *dev, get_skey_in.header.buffer_len = WIRED_CMD_BUF_LEN_GET_SESSION_KEY_IN; get_skey_in.port.integrated_port_type = data->port_type; - get_skey_in.port.physical_port = mei_get_ddi_index(data->port); + get_skey_in.port.physical_port = (u8)data->fw_ddi; + get_skey_in.port.attached_transcoder = (u8)data->fw_tc; byte = mei_cldev_send(cldev, (u8 *)&get_skey_in, sizeof(get_skey_in)); if (byte < 0) { @@ -499,7 +494,8 @@ mei_hdcp_repeater_check_flow_prepare_ack(struct device *dev, WIRED_CMD_BUF_LEN_VERIFY_REPEATER_IN; verify_repeater_in.port.integrated_port_type = data->port_type; - verify_repeater_in.port.physical_port = mei_get_ddi_index(data->port); + verify_repeater_in.port.physical_port = (u8)data->fw_ddi; + verify_repeater_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(verify_repeater_in.rx_info, rep_topology->rx_info, HDCP_2_2_RXINFO_LEN); @@ -569,7 +565,8 @@ static int mei_hdcp_verify_mprime(struct device *dev, WIRED_CMD_BUF_LEN_REPEATER_AUTH_STREAM_REQ_MIN_IN; verify_mprime_in.port.integrated_port_type = data->port_type; - verify_mprime_in.port.physical_port = mei_get_ddi_index(data->port); + verify_mprime_in.port.physical_port = (u8)data->fw_ddi; + verify_mprime_in.port.attached_transcoder = (u8)data->fw_tc; memcpy(verify_mprime_in.m_prime, stream_ready->m_prime, HDCP_2_2_MPRIME_LEN); @@ -630,7 +627,8 @@ static int mei_hdcp_enable_authentication(struct device *dev, enable_auth_in.header.buffer_len = WIRED_CMD_BUF_LEN_ENABLE_AUTH_IN; enable_auth_in.port.integrated_port_type = data->port_type; - enable_auth_in.port.physical_port = mei_get_ddi_index(data->port); + enable_auth_in.port.physical_port = (u8)data->fw_ddi; + enable_auth_in.port.attached_transcoder = (u8)data->fw_tc; enable_auth_in.stream_type = data->streams[0].stream_type; byte = mei_cldev_send(cldev, (u8 *)&enable_auth_in, @@ -684,7 +682,8 @@ mei_hdcp_close_session(struct device *dev, struct hdcp_port_data *data) WIRED_CMD_BUF_LEN_CLOSE_SESSION_IN; session_close_in.port.integrated_port_type = data->port_type; - session_close_in.port.physical_port = mei_get_ddi_index(data->port); + session_close_in.port.physical_port = (u8)data->fw_ddi; + session_close_in.port.attached_transcoder = (u8)data->fw_tc; byte = mei_cldev_send(cldev, (u8 *)&session_close_in, sizeof(session_close_in)); diff --git a/drivers/misc/mei/hdcp/mei_hdcp.h b/drivers/misc/mei/hdcp/mei_hdcp.h index e4b1cd54c853..18ffc773fa18 100644 --- a/drivers/misc/mei/hdcp/mei_hdcp.h +++ b/drivers/misc/mei/hdcp/mei_hdcp.h @@ -184,8 +184,11 @@ struct hdcp_cmd_no_data { /* Uniquely identifies the hdcp port being addressed for a given command. */ struct hdcp_port_id { u8 integrated_port_type; + /* physical_port is used until Gen11.5. Must be zero for Gen11.5+ */ u8 physical_port; - u16 reserved; + /* attached_transcoder is for Gen11.5+. Set to zero for <Gen11.5 */ + u8 attached_transcoder; + u8 reserved; } __packed; /* @@ -362,16 +365,4 @@ struct wired_cmd_repeater_auth_stream_req_out { struct hdcp_cmd_header header; struct hdcp_port_id port; } __packed; - -enum mei_fw_ddi { - MEI_DDI_INVALID_PORT = 0x0, - - MEI_DDI_B = 1, - MEI_DDI_C, - MEI_DDI_D, - MEI_DDI_E, - MEI_DDI_F, - MEI_DDI_A = 7, - MEI_DDI_RANGE_END = MEI_DDI_A, -}; #endif /* __MEI_HDCP_H__ */ diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 77f7dff7098d..c09f8bb49495 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -79,6 +79,9 @@ #define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */ #define MEI_DEV_ID_CNP_H_4 0xA364 /* Cannon Point H 4 (iTouch) */ +#define MEI_DEV_ID_CMP_LP 0x02e0 /* Comet Point LP */ +#define MEI_DEV_ID_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */ + #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ #define MEI_DEV_ID_TGP_LP 0xA0E0 /* Tiger Lake Point LP */ diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index abe1b1f4362f..c4f6991d3028 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1355,6 +1355,8 @@ static bool mei_me_fw_type_sps(struct pci_dev *pdev) #define MEI_CFG_FW_SPS \ .quirk_probe = mei_me_fw_type_sps +#define MEI_CFG_FW_VER_SUPP \ + .fw_ver_supported = 1 #define MEI_CFG_ICH_HFS \ .fw_status.count = 0 @@ -1392,31 +1394,41 @@ static const struct mei_cfg mei_me_ich10_cfg = { MEI_CFG_ICH10_HFS, }; -/* PCH devices */ -static const struct mei_cfg mei_me_pch_cfg = { +/* PCH6 devices */ +static const struct mei_cfg mei_me_pch6_cfg = { MEI_CFG_PCH_HFS, }; +/* PCH7 devices */ +static const struct mei_cfg mei_me_pch7_cfg = { + MEI_CFG_PCH_HFS, + MEI_CFG_FW_VER_SUPP, +}; + /* PCH Cougar Point and Patsburg with quirk for Node Manager exclusion */ static const struct mei_cfg mei_me_pch_cpt_pbg_cfg = { MEI_CFG_PCH_HFS, + MEI_CFG_FW_VER_SUPP, MEI_CFG_FW_NM, }; /* PCH8 Lynx Point and newer devices */ static const struct mei_cfg mei_me_pch8_cfg = { MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, }; /* PCH8 Lynx Point with quirk for SPS Firmware exclusion */ static const struct mei_cfg mei_me_pch8_sps_cfg = { MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, MEI_CFG_FW_SPS, }; /* Cannon Lake and newer devices */ static const struct mei_cfg mei_me_pch12_cfg = { MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, MEI_CFG_DMA_128, }; @@ -1428,7 +1440,8 @@ static const struct mei_cfg *const mei_cfg_list[] = { [MEI_ME_UNDEF_CFG] = NULL, [MEI_ME_ICH_CFG] = &mei_me_ich_cfg, [MEI_ME_ICH10_CFG] = &mei_me_ich10_cfg, - [MEI_ME_PCH_CFG] = &mei_me_pch_cfg, + [MEI_ME_PCH6_CFG] = &mei_me_pch6_cfg, + [MEI_ME_PCH7_CFG] = &mei_me_pch7_cfg, [MEI_ME_PCH_CPT_PBG_CFG] = &mei_me_pch_cpt_pbg_cfg, [MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg, [MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg, @@ -1473,6 +1486,8 @@ struct mei_device *mei_me_dev_init(struct pci_dev *pdev, mei_device_init(dev, &pdev->dev, &mei_me_hw_ops); hw->cfg = cfg; + dev->fw_f_fw_ver_supported = cfg->fw_ver_supported; + return dev; } diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index 08c84a0de4a8..1d8794828cbc 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -20,11 +20,13 @@ * @fw_status: FW status * @quirk_probe: device exclusion quirk * @dma_size: device DMA buffers size + * @fw_ver_supported: is fw version retrievable from FW */ struct mei_cfg { const struct mei_fw_status fw_status; bool (*quirk_probe)(struct pci_dev *pdev); size_t dma_size[DMA_DSCR_NUM]; + u32 fw_ver_supported:1; }; @@ -62,7 +64,8 @@ struct mei_me_hw { * @MEI_ME_UNDEF_CFG: Lower sentinel. * @MEI_ME_ICH_CFG: I/O Controller Hub legacy devices. * @MEI_ME_ICH10_CFG: I/O Controller Hub platforms Gen10 - * @MEI_ME_PCH_CFG: Platform Controller Hub platforms (Up to Gen8). + * @MEI_ME_PCH6_CFG: Platform Controller Hub platforms (Gen6). + * @MEI_ME_PCH7_CFG: Platform Controller Hub platforms (Gen7). * @MEI_ME_PCH_CPT_PBG_CFG:Platform Controller Hub workstations * with quirk for Node Manager exclusion. * @MEI_ME_PCH8_CFG: Platform Controller Hub Gen8 and newer @@ -77,7 +80,8 @@ enum mei_cfg_idx { MEI_ME_UNDEF_CFG, MEI_ME_ICH_CFG, MEI_ME_ICH10_CFG, - MEI_ME_PCH_CFG, + MEI_ME_PCH6_CFG, + MEI_ME_PCH7_CFG, MEI_ME_PCH_CPT_PBG_CFG, MEI_ME_PCH8_CFG, MEI_ME_PCH8_SPS_CFG, diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index f71a023aed3c..0f2141178299 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -426,6 +426,8 @@ struct mei_fw_version { * * @fw_ver : FW versions * + * @fw_f_fw_ver_supported : fw feature: fw version supported + * * @me_clients_rwsem: rw lock over me_clients list * @me_clients : list of FW clients * @me_clients_map : FW clients bit map @@ -506,6 +508,8 @@ struct mei_device { struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS]; + unsigned int fw_f_fw_ver_supported:1; + struct rw_semaphore me_clients_rwsem; struct list_head me_clients; DECLARE_BITMAP(me_clients_map, MEI_CLIENTS_MAX); diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index d5a92c6eadb3..3dca63eddaa0 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -61,13 +61,13 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_3, MEI_ME_ICH10_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_4, MEI_ME_ICH10_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH6_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH6_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CPT_1, MEI_ME_PCH_CPT_PBG_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_PBG_1, MEI_ME_PCH_CPT_PBG_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH7_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH7_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH7_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, MEI_ME_PCH8_CFG)}, @@ -96,6 +96,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH12_CFG)}, diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index d4ada5cca2d1..234551a68739 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -646,8 +646,8 @@ int renesas_sdhi_probe(struct platform_device *pdev, struct tmio_mmc_dma *dma_priv; struct tmio_mmc_host *host; struct renesas_sdhi *priv; + int num_irqs, irq, ret, i; struct resource *res; - int irq, ret, i; u16 ver; of_data = of_device_get_match_data(&pdev->dev); @@ -825,24 +825,31 @@ int renesas_sdhi_probe(struct platform_device *pdev, host->hs400_complete = renesas_sdhi_hs400_complete; } - i = 0; - while (1) { + num_irqs = platform_irq_count(pdev); + if (num_irqs < 0) { + ret = num_irqs; + goto eirq; + } + + /* There must be at least one IRQ source */ + if (!num_irqs) { + ret = -ENXIO; + goto eirq; + } + + for (i = 0; i < num_irqs; i++) { irq = platform_get_irq(pdev, i); - if (irq < 0) - break; - i++; + if (irq < 0) { + ret = irq; + goto eirq; + } + ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0, dev_name(&pdev->dev), host); if (ret) goto eirq; } - /* There must be at least one IRQ source */ - if (!i) { - ret = irq; - goto eirq; - } - dev_info(&pdev->dev, "%s base at 0x%08lx max clock rate %u MHz\n", mmc_hostname(host->mmc), (unsigned long) (platform_get_resource(pdev, IORESOURCE_MEM, 0)->start), diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 2b9cdcd1dd9d..f4f5f0a70cda 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -262,6 +262,7 @@ static const struct sdhci_iproc_data bcm2835_data = { }; static const struct sdhci_pltfm_data sdhci_bcm2711_pltfm_data = { + .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, .ops = &sdhci_iproc_32only_ops, }; diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index 81bd9afb0980..98c575de43c7 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -1393,11 +1393,9 @@ static int sh_mmcif_probe(struct platform_device *pdev) const char *name; irq[0] = platform_get_irq(pdev, 0); - irq[1] = platform_get_irq(pdev, 1); - if (irq[0] < 0) { - dev_err(dev, "Get irq error\n"); + irq[1] = platform_get_irq_optional(pdev, 1); + if (irq[0] < 0) return -ENXIO; - } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); reg = devm_ioremap_resource(dev, res); diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c index 97a97a9ccc36..e10b76089048 100644 --- a/drivers/mtd/nand/raw/au1550nd.c +++ b/drivers/mtd/nand/raw/au1550nd.c @@ -134,16 +134,15 @@ static void au_write_buf16(struct nand_chip *this, const u_char *buf, int len) /** * au_read_buf16 - read chip data into buffer - * @mtd: MTD device structure + * @this: NAND chip object * @buf: buffer to store date * @len: number of bytes to read * * read function for 16bit buswidth */ -static void au_read_buf16(struct mtd_info *mtd, u_char *buf, int len) +static void au_read_buf16(struct nand_chip *this, u_char *buf, int len) { int i; - struct nand_chip *this = mtd_to_nand(mtd); u16 *p = (u16 *) buf; len >>= 1; diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 1d8621d43160..7acf4a93b592 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -487,7 +487,7 @@ static int write_sr(struct spi_nor *nor, u8 val) SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1), SPI_MEM_OP_NO_ADDR, SPI_MEM_OP_NO_DUMMY, - SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1)); + SPI_MEM_OP_DATA_OUT(1, nor->bouncebuf, 1)); return spi_mem_exec_op(nor->spimem, &op); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 931d9d935686..21d8fcc83c9c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4039,7 +4039,7 @@ out: * this to-be-skipped slave to send a packet out. */ old_arr = rtnl_dereference(bond->slave_arr); - for (idx = 0; idx < old_arr->count; idx++) { + for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) { if (skipslave == old_arr->arr[idx]) { old_arr->arr[idx] = old_arr->arr[old_arr->count-1]; diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 526ba2ab66f1..cc3536315eff 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1845,7 +1845,6 @@ int b53_mirror_add(struct dsa_switch *ds, int port, loc = B53_EG_MIR_CTL; b53_read16(dev, B53_MGMT_PAGE, loc, ®); - reg &= ~MIRROR_MASK; reg |= BIT(port); b53_write16(dev, B53_MGMT_PAGE, loc, reg); diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index a23d3ffdf0c4..24a5e99f7fd5 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1224,10 +1224,6 @@ static int ksz8795_switch_init(struct ksz_device *dev) { int i; - mutex_init(&dev->stats_mutex); - mutex_init(&dev->alu_mutex); - mutex_init(&dev->vlan_mutex); - dev->ds->ops = &ksz8795_switch_ops; for (i = 0; i < ARRAY_SIZE(ksz8795_switch_chips); i++) { diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c index d0f8153e86b7..8b00f8e6c02f 100644 --- a/drivers/net/dsa/microchip/ksz8795_spi.c +++ b/drivers/net/dsa/microchip/ksz8795_spi.c @@ -25,6 +25,7 @@ KSZ_REGMAP_TABLE(ksz8795, 16, SPI_ADDR_SHIFT, static int ksz8795_spi_probe(struct spi_device *spi) { + struct regmap_config rc; struct ksz_device *dev; int i, ret; @@ -33,9 +34,9 @@ static int ksz8795_spi_probe(struct spi_device *spi) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(ksz8795_regmap_config); i++) { - dev->regmap[i] = devm_regmap_init_spi(spi, - &ksz8795_regmap_config - [i]); + rc = ksz8795_regmap_config[i]; + rc.lock_arg = &dev->regmap_mutex; + dev->regmap[i] = devm_regmap_init_spi(spi, &rc); if (IS_ERR(dev->regmap[i])) { ret = PTR_ERR(dev->regmap[i]); dev_err(&spi->dev, diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index 0b1e01f0873d..fdffd9e0c518 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -17,6 +17,7 @@ KSZ_REGMAP_TABLE(ksz9477, not_used, 16, 0, 0); static int ksz9477_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *i2c_id) { + struct regmap_config rc; struct ksz_device *dev; int i, ret; @@ -25,8 +26,9 @@ static int ksz9477_i2c_probe(struct i2c_client *i2c, return -ENOMEM; for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) { - dev->regmap[i] = devm_regmap_init_i2c(i2c, - &ksz9477_regmap_config[i]); + rc = ksz9477_regmap_config[i]; + rc.lock_arg = &dev->regmap_mutex; + dev->regmap[i] = devm_regmap_init_i2c(i2c, &rc); if (IS_ERR(dev->regmap[i])) { ret = PTR_ERR(dev->regmap[i]); dev_err(&i2c->dev, diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index 2938e892b631..16939f29faa5 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * +/* SPDX-License-Identifier: GPL-2.0 */ +/* * Microchip KSZ9477 register definitions * * Copyright (C) 2017-2018 Microchip Technology Inc. diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index f4198d6f72be..c5f64959a184 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -24,6 +24,7 @@ KSZ_REGMAP_TABLE(ksz9477, 32, SPI_ADDR_SHIFT, static int ksz9477_spi_probe(struct spi_device *spi) { + struct regmap_config rc; struct ksz_device *dev; int i, ret; @@ -32,8 +33,9 @@ static int ksz9477_spi_probe(struct spi_device *spi) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) { - dev->regmap[i] = devm_regmap_init_spi(spi, - &ksz9477_regmap_config[i]); + rc = ksz9477_regmap_config[i]; + rc.lock_arg = &dev->regmap_mutex; + dev->regmap[i] = devm_regmap_init_spi(spi, &rc); if (IS_ERR(dev->regmap[i])) { ret = PTR_ERR(dev->regmap[i]); dev_err(&spi->dev, diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index b0b870f0c252..fe47180c908b 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -436,7 +436,7 @@ int ksz_switch_register(struct ksz_device *dev, } mutex_init(&dev->dev_mutex); - mutex_init(&dev->stats_mutex); + mutex_init(&dev->regmap_mutex); mutex_init(&dev->alu_mutex); mutex_init(&dev->vlan_mutex); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index dd60d0837fc6..a20ebb749377 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Microchip switch driver common header +/* SPDX-License-Identifier: GPL-2.0 */ +/* Microchip switch driver common header * * Copyright (C) 2017-2019 Microchip Technology Inc. */ @@ -47,7 +47,7 @@ struct ksz_device { const char *name; struct mutex dev_mutex; /* device access */ - struct mutex stats_mutex; /* status access */ + struct mutex regmap_mutex; /* regmap access */ struct mutex alu_mutex; /* ALU access */ struct mutex vlan_mutex; /* vlan access */ const struct ksz_dev_ops *dev_ops; @@ -290,6 +290,18 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset, ksz_write32(dev, dev->dev_ops->get_port_addr(port, offset), data); } +static inline void ksz_regmap_lock(void *__mtx) +{ + struct mutex *mtx = __mtx; + mutex_lock(mtx); +} + +static inline void ksz_regmap_unlock(void *__mtx) +{ + struct mutex *mtx = __mtx; + mutex_unlock(mtx); +} + /* Regmap tables generation */ #define KSZ_SPI_OP_RD 3 #define KSZ_SPI_OP_WR 2 @@ -314,6 +326,8 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset, .write_flag_mask = \ KSZ_SPI_OP_FLAG_MASK(KSZ_SPI_OP_WR, swp, \ regbits, regpad), \ + .lock = ksz_regmap_lock, \ + .unlock = ksz_regmap_unlock, \ .reg_format_endian = REGMAP_ENDIAN_BIG, \ .val_format_endian = REGMAP_ENDIAN_BIG \ } diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 684aa51684db..b00274caae4f 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -705,7 +705,7 @@ qca8k_setup(struct dsa_switch *ds) BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S); /* Setup connection between CPU port & user ports */ - for (i = 0; i < DSA_MAX_PORTS; i++) { + for (i = 0; i < QCA8K_NUM_PORTS; i++) { /* CPU port gets connected to all user ports of the switch */ if (dsa_is_cpu_port(ds, i)) { qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT), @@ -1077,7 +1077,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev) if (id != QCA8K_ID_QCA8337) return -ENODEV; - priv->ds = dsa_switch_alloc(&mdiodev->dev, DSA_MAX_PORTS); + priv->ds = dsa_switch_alloc(&mdiodev->dev, QCA8K_NUM_PORTS); if (!priv->ds) return -ENOMEM; diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c index ca3d17e43ed8..ac88caca5ad4 100644 --- a/drivers/net/dsa/rtl8366.c +++ b/drivers/net/dsa/rtl8366.c @@ -339,10 +339,12 @@ int rtl8366_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { struct realtek_smi *smi = ds->priv; + u16 vid; int ret; - if (!smi->ops->is_vlan_valid(smi, port)) - return -EINVAL; + for (vid = vlan->vid_begin; vid < vlan->vid_end; vid++) + if (!smi->ops->is_vlan_valid(smi, vid)) + return -EINVAL; dev_info(smi->dev, "prepare VLANs %04x..%04x\n", vlan->vid_begin, vlan->vid_end); @@ -370,8 +372,9 @@ void rtl8366_vlan_add(struct dsa_switch *ds, int port, u16 vid; int ret; - if (!smi->ops->is_vlan_valid(smi, port)) - return; + for (vid = vlan->vid_begin; vid < vlan->vid_end; vid++) + if (!smi->ops->is_vlan_valid(smi, vid)) + return; dev_info(smi->dev, "add VLAN on port %d, %s, %s\n", port, diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c index a268085ffad2..f5cc8b0a7c74 100644 --- a/drivers/net/dsa/rtl8366rb.c +++ b/drivers/net/dsa/rtl8366rb.c @@ -507,7 +507,8 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) irq = of_irq_get(intc, 0); if (irq <= 0) { dev_err(smi->dev, "failed to get parent IRQ\n"); - return irq ? irq : -EINVAL; + ret = irq ? irq : -EINVAL; + goto out_put_node; } /* This clears the IRQ status register */ @@ -515,7 +516,7 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) &val); if (ret) { dev_err(smi->dev, "can't read interrupt status\n"); - return ret; + goto out_put_node; } /* Fetch IRQ edge information from the descriptor */ @@ -537,7 +538,7 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) val); if (ret) { dev_err(smi->dev, "could not configure IRQ polarity\n"); - return ret; + goto out_put_node; } ret = devm_request_threaded_irq(smi->dev, irq, NULL, @@ -545,7 +546,7 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) "RTL8366RB", smi); if (ret) { dev_err(smi->dev, "unable to request irq: %d\n", ret); - return ret; + goto out_put_node; } smi->irqdomain = irq_domain_add_linear(intc, RTL8366RB_NUM_INTERRUPT, @@ -553,12 +554,15 @@ static int rtl8366rb_setup_cascaded_irq(struct realtek_smi *smi) smi); if (!smi->irqdomain) { dev_err(smi->dev, "failed to create IRQ domain\n"); - return -EINVAL; + ret = -EINVAL; + goto out_put_node; } for (i = 0; i < smi->num_ports; i++) irq_set_parent(irq_create_mapping(smi->irqdomain, i), irq); - return 0; +out_put_node: + of_node_put(intc); + return ret; } static int rtl8366rb_set_addr(struct realtek_smi *smi) diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index e53e494c22e0..fbb564c3beb8 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2018, Sensor-Technik Wiedemann GmbH +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Sensor-Technik Wiedemann GmbH * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com> */ #ifndef _SJA1105_H diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.h b/drivers/net/dsa/sja1105/sja1105_dynamic_config.h index 740dadf43f01..1fc0d13dc623 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.h +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> */ #ifndef _SJA1105_DYNAMIC_CONFIG_H #define _SJA1105_DYNAMIC_CONFIG_H diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index b9def744bcb3..7687ddcae159 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1897,7 +1897,9 @@ static int sja1105_set_ageing_time(struct dsa_switch *ds, return sja1105_static_config_reload(priv); } -/* Caller must hold priv->tagger_data.meta_lock */ +/* Must be called only with priv->tagger_data.state bit + * SJA1105_HWTS_RX_EN cleared + */ static int sja1105_change_rxtstamping(struct sja1105_private *priv, bool on) { @@ -1954,16 +1956,17 @@ static int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, break; } - if (rx_on != priv->tagger_data.hwts_rx_en) { - spin_lock(&priv->tagger_data.meta_lock); + if (rx_on != test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) { + clear_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state); + rc = sja1105_change_rxtstamping(priv, rx_on); - spin_unlock(&priv->tagger_data.meta_lock); if (rc < 0) { dev_err(ds->dev, "Failed to change RX timestamping: %d\n", rc); - return -EFAULT; + return rc; } - priv->tagger_data.hwts_rx_en = rx_on; + if (rx_on) + set_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state); } if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) @@ -1982,7 +1985,7 @@ static int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, config.tx_type = HWTSTAMP_TX_ON; else config.tx_type = HWTSTAMP_TX_OFF; - if (priv->tagger_data.hwts_rx_en) + if (test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state)) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; else config.rx_filter = HWTSTAMP_FILTER_NONE; @@ -2005,12 +2008,12 @@ static void sja1105_rxtstamp_work(struct work_struct *work) mutex_lock(&priv->ptp_lock); - now = priv->tstamp_cc.read(&priv->tstamp_cc); - while ((skb = skb_dequeue(&data->skb_rxtstamp_queue)) != NULL) { struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb); u64 ts; + now = priv->tstamp_cc.read(&priv->tstamp_cc); + *shwt = (struct skb_shared_hwtstamps) {0}; ts = SJA1105_SKB_CB(skb)->meta_tstamp; @@ -2031,7 +2034,7 @@ static bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port, struct sja1105_private *priv = ds->priv; struct sja1105_tagger_data *data = &priv->tagger_data; - if (!data->hwts_rx_en) + if (!test_bit(SJA1105_HWTS_RX_EN, &data->state)) return false; /* We need to read the full PTP clock to reconstruct the Rx @@ -2201,6 +2204,7 @@ static int sja1105_probe(struct spi_device *spi) tagger_data = &priv->tagger_data; skb_queue_head_init(&tagger_data->skb_rxtstamp_queue); INIT_WORK(&tagger_data->rxtstamp_work, sja1105_rxtstamp_work); + spin_lock_init(&tagger_data->meta_lock); /* Connections between dsa_port and sja1105_port */ for (i = 0; i < SJA1105_NUM_PORTS; i++) { diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h index af456b0a4d27..394e12a6ad59 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.h +++ b/drivers/net/dsa/sja1105/sja1105_ptp.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> */ #ifndef _SJA1105_PTP_H #define _SJA1105_PTP_H diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c index 84dc603138cf..58dd37ecde17 100644 --- a/drivers/net/dsa/sja1105/sja1105_spi.c +++ b/drivers/net/dsa/sja1105/sja1105_spi.c @@ -409,7 +409,8 @@ int sja1105_static_config_upload(struct sja1105_private *priv) rc = static_config_buf_prepare_for_upload(priv, config_buf, buf_len); if (rc < 0) { dev_err(dev, "Invalid config, cannot upload\n"); - return -EINVAL; + rc = -EINVAL; + goto out; } /* Prevent PHY jabbering during switch reset by inhibiting * Tx on all ports and waiting for current packet to drain. @@ -418,7 +419,8 @@ int sja1105_static_config_upload(struct sja1105_private *priv) rc = sja1105_inhibit_tx(priv, port_bitmap, true); if (rc < 0) { dev_err(dev, "Failed to inhibit Tx on ports\n"); - return -ENXIO; + rc = -ENXIO; + goto out; } /* Wait for an eventual egress packet to finish transmission * (reach IFG). It is guaranteed that a second one will not diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h index 7f87022a2d61..f4a5c5c04311 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.h +++ b/drivers/net/dsa/sja1105/sja1105_static_config.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright (c) 2016-2018, NXP Semiconductors +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright (c) 2016-2018, NXP Semiconductors * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com> */ #ifndef _SJA1105_STATIC_CONFIG_H diff --git a/drivers/net/dsa/sja1105/sja1105_tas.h b/drivers/net/dsa/sja1105/sja1105_tas.h index 0b803c30e640..0aad212d88b2 100644 --- a/drivers/net/dsa/sja1105/sja1105_tas.h +++ b/drivers/net/dsa/sja1105/sja1105_tas.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com> */ #ifndef _SJA1105_TAS_H #define _SJA1105_TAS_H diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index b4a0fb281e69..bb65dd39f847 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -194,9 +194,7 @@ static void aq_ndev_set_multicast_settings(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); - aq_nic_set_packet_filter(aq_nic, ndev->flags); - - aq_nic_set_multicast_list(aq_nic, ndev); + (void)aq_nic_set_multicast_list(aq_nic, ndev); } static int aq_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 8f66e7817811..137c1de4c6ec 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -631,9 +631,12 @@ err_exit: int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev) { - unsigned int packet_filter = self->packet_filter; + const struct aq_hw_ops *hw_ops = self->aq_hw_ops; + struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; + unsigned int packet_filter = ndev->flags; struct netdev_hw_addr *ha = NULL; unsigned int i = 0U; + int err = 0; self->mc_list.count = 0; if (netdev_uc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) { @@ -641,29 +644,28 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev) } else { netdev_for_each_uc_addr(ha, ndev) { ether_addr_copy(self->mc_list.ar[i++], ha->addr); - - if (i >= AQ_HW_MULTICAST_ADDRESS_MAX) - break; } } - if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) { - packet_filter |= IFF_ALLMULTI; - } else { - netdev_for_each_mc_addr(ha, ndev) { - ether_addr_copy(self->mc_list.ar[i++], ha->addr); - - if (i >= AQ_HW_MULTICAST_ADDRESS_MAX) - break; + cfg->is_mc_list_enabled = !!(packet_filter & IFF_MULTICAST); + if (cfg->is_mc_list_enabled) { + if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) { + packet_filter |= IFF_ALLMULTI; + } else { + netdev_for_each_mc_addr(ha, ndev) { + ether_addr_copy(self->mc_list.ar[i++], + ha->addr); + } } } if (i > 0 && i <= AQ_HW_MULTICAST_ADDRESS_MAX) { - packet_filter |= IFF_MULTICAST; self->mc_list.count = i; - self->aq_hw_ops->hw_multicast_list_set(self->aq_hw, - self->mc_list.ar, - self->mc_list.count); + err = hw_ops->hw_multicast_list_set(self->aq_hw, + self->mc_list.ar, + self->mc_list.count); + if (err < 0) + return err; } return aq_nic_set_packet_filter(self, packet_filter); } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 3901d7994ca1..76bdbe1596d6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -313,6 +313,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, break; buff->is_error |= buff_->is_error; + buff->is_cso_err |= buff_->is_cso_err; } while (!buff_->is_eop); @@ -320,7 +321,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, err = 0; goto err_exit; } - if (buff->is_error) { + if (buff->is_error || buff->is_cso_err) { buff_ = buff; do { next_ = buff_->next, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 30f7fc4c97ff..2ad3fa6316ce 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -818,14 +818,15 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, cfg->is_vlan_force_promisc); hw_atl_rpfl2multicast_flr_en_set(self, - IS_FILTER_ENABLED(IFF_ALLMULTI), 0); + IS_FILTER_ENABLED(IFF_ALLMULTI) && + IS_FILTER_ENABLED(IFF_MULTICAST), 0); hw_atl_rpfl2_accept_all_mc_packets_set(self, - IS_FILTER_ENABLED(IFF_ALLMULTI)); + IS_FILTER_ENABLED(IFF_ALLMULTI) && + IS_FILTER_ENABLED(IFF_MULTICAST)); hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST)); - cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST); for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i) hw_atl_rpfl2_uc_flr_en_set(self, @@ -968,14 +969,26 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self) static int hw_atl_b0_hw_stop(struct aq_hw_s *self) { + int err; + u32 val; + hw_atl_b0_hw_irq_disable(self, HW_ATL_B0_INT_MASK); /* Invalidate Descriptor Cache to prevent writing to the cached * descriptors and to the data pointer of those descriptors */ - hw_atl_rdm_rx_dma_desc_cache_init_set(self, 1); + hw_atl_rdm_rx_dma_desc_cache_init_tgl(self); - return aq_hw_err_from_flags(self); + err = aq_hw_err_from_flags(self); + + if (err) + goto err_exit; + + readx_poll_timeout_atomic(hw_atl_rdm_rx_dma_desc_cache_init_done_get, + self, val, val == 1, 1000U, 10000U); + +err_exit: + return err; } static int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c index 1149812ae463..6f340695e6bd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c @@ -606,12 +606,25 @@ void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw, u32 rx_flow_ctl_mode HW_ATL_RPB_RX_FC_MODE_SHIFT, rx_flow_ctl_mode); } -void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init) +void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw) { + u32 val; + + val = aq_hw_read_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR, + HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK, + HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT); + aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT, - init); + val ^ 1); +} + +u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw) +{ + return aq_hw_read_reg_bit(aq_hw, RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR, + RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK, + RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT); } void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h index 0c37abbabca5..c3ee278c3747 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h @@ -313,8 +313,11 @@ void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_pkt_buff_size_per_tc, u32 buffer); -/* set rdm rx dma descriptor cache init */ -void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init); +/* toggle rdm rx dma descriptor cache init */ +void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw); + +/* get rdm rx dma descriptor cache init done */ +u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw); /* set rx xoff enable (per tc) */ void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h index c3febcdfa92e..35887ad89025 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h @@ -318,6 +318,25 @@ /* default value of bitfield rdm_desc_init_i */ #define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_DEFAULT 0x0 +/* rdm_desc_init_done_i bitfield definitions + * preprocessor definitions for the bitfield rdm_desc_init_done_i. + * port="pif_rdm_desc_init_done_i" + */ + +/* register address for bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR 0x00005a10 +/* bitmask for bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK 0x00000001U +/* inverted bitmask for bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSKN 0xfffffffe +/* lower bit position of bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT 0U +/* width of bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_WIDTH 1 +/* default value of bitfield rdm_desc_init_done_i */ +#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_DEFAULT 0x0 + + /* rx int_desc_wrb_en bitfield definitions * preprocessor definitions for the bitfield "int_desc_wrb_en". * port="pif_rdm_int_desc_wrb_en_i" diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index da726489e3c8..7bc51f8d6f2f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -337,7 +337,7 @@ static int aq_fw2x_get_phy_temp(struct aq_hw_s *self, int *temp) /* Convert PHY temperature from 1/256 degree Celsius * to 1/1000 degree Celsius. */ - *temp = temp_res * 1000 / 256; + *temp = (temp_res & 0xFFFF) * 1000 / 256; return 0; } diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 7548247455d7..1b1a09095c0d 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -526,7 +526,7 @@ static int ag71xx_mdio_probe(struct ag71xx *ag) struct device *dev = &ag->pdev->dev; struct net_device *ndev = ag->ndev; static struct mii_bus *mii_bus; - struct device_node *np; + struct device_node *np, *mnp; int err; np = dev->of_node; @@ -571,7 +571,9 @@ static int ag71xx_mdio_probe(struct ag71xx *ag) msleep(200); } - err = of_mdiobus_register(mii_bus, np); + mnp = of_get_child_by_name(np, "mdio"); + err = of_mdiobus_register(mii_bus, mnp); + of_node_put(mnp); if (err) goto mdio_err_put_clk; diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index e24f5d2b6afe..53055ce5dfd6 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -8,7 +8,6 @@ config NET_VENDOR_BROADCOM default y depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \ SIBYTE_SB1xxx_SOC - select DIMLIB ---help--- If you have a network (Ethernet) chipset belonging to this class, say Y. @@ -69,6 +68,7 @@ config BCMGENET select FIXED_PHY select BCM7XXX_PHY select MDIO_BCM_UNIMAC + select DIMLIB help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. @@ -188,6 +188,7 @@ config SYSTEMPORT select MII select PHYLIB select FIXED_PHY + select DIMLIB help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset using an internal @@ -200,6 +201,7 @@ config BNXT select LIBCRC32C select NET_DEVLINK select PAGE_POOL + select DIMLIB ---help--- This driver supports Broadcom NetXtreme-C/E 10/25/40/50 gigabit Ethernet cards. To compile this driver as a module, choose M here: diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 12cb77ef1081..0f138280315a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2018,6 +2018,8 @@ static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv) */ if (priv->internal_phy) { int0_enable |= UMAC_IRQ_LINK_EVENT; + if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) + int0_enable |= UMAC_IRQ_PHY_DET_R; } else if (priv->ext_phy) { int0_enable |= UMAC_IRQ_LINK_EVENT; } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { @@ -2611,11 +2613,14 @@ static void bcmgenet_irq_task(struct work_struct *work) priv->irq0_stat = 0; spin_unlock_irq(&priv->lock); + if (status & UMAC_IRQ_PHY_DET_R && + priv->dev->phydev->autoneg != AUTONEG_ENABLE) + phy_init_hw(priv->dev->phydev); + /* Link UP/DOWN event */ - if (status & UMAC_IRQ_LINK_EVENT) { - priv->dev->phydev->link = !!(status & UMAC_IRQ_LINK_UP); + if (status & UMAC_IRQ_LINK_EVENT) phy_mac_interrupt(priv->dev->phydev); - } + } /* bcmgenet_isr1: handle Rx and Tx priority queues */ @@ -2710,7 +2715,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } /* all other interested interrupts handled in bottom half */ - status &= UMAC_IRQ_LINK_EVENT; + status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_PHY_DET_R); if (status) { /* Save irq status for bottom-half processing. */ spin_lock_irqsave(&priv->lock, flags); @@ -2874,6 +2879,12 @@ static int bcmgenet_open(struct net_device *dev) if (priv->internal_phy) bcmgenet_power_up(priv, GENET_POWER_PASSIVE); + ret = bcmgenet_mii_connect(dev); + if (ret) { + netdev_err(dev, "failed to connect to PHY\n"); + goto err_clk_disable; + } + /* take MAC out of reset */ bcmgenet_umac_reset(priv); @@ -2883,6 +2894,12 @@ static int bcmgenet_open(struct net_device *dev) reg = bcmgenet_umac_readl(priv, UMAC_CMD); priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); + ret = bcmgenet_mii_config(dev, true); + if (ret) { + netdev_err(dev, "unsupported PHY\n"); + goto err_disconnect_phy; + } + bcmgenet_set_hw_addr(priv, dev->dev_addr); if (priv->internal_phy) { @@ -2898,7 +2915,7 @@ static int bcmgenet_open(struct net_device *dev) ret = bcmgenet_init_dma(priv); if (ret) { netdev_err(dev, "failed to initialize DMA\n"); - goto err_clk_disable; + goto err_disconnect_phy; } /* Always enable ring 16 - descriptor ring */ @@ -2921,25 +2938,19 @@ static int bcmgenet_open(struct net_device *dev) goto err_irq0; } - ret = bcmgenet_mii_probe(dev); - if (ret) { - netdev_err(dev, "failed to connect to PHY\n"); - goto err_irq1; - } - bcmgenet_netif_start(dev); netif_tx_start_all_queues(dev); return 0; -err_irq1: - free_irq(priv->irq1, priv); err_irq0: free_irq(priv->irq0, priv); err_fini_dma: bcmgenet_dma_teardown(priv); bcmgenet_fini_dma(priv); +err_disconnect_phy: + phy_disconnect(dev->phydev); err_clk_disable: if (priv->internal_phy) bcmgenet_power_down(priv, GENET_POWER_PASSIVE); @@ -3620,6 +3631,8 @@ static int bcmgenet_resume(struct device *d) if (priv->internal_phy) bcmgenet_power_up(priv, GENET_POWER_PASSIVE); + phy_init_hw(dev->phydev); + bcmgenet_umac_reset(priv); init_umac(priv); @@ -3628,8 +3641,6 @@ static int bcmgenet_resume(struct device *d) if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); - phy_init_hw(dev->phydev); - /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev, false); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 4a8fc03d82fd..7fbf573d8d52 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -366,6 +366,7 @@ struct bcmgenet_mib_counters { #define EXT_PWR_DOWN_PHY_EN (1 << 20) #define EXT_RGMII_OOB_CTRL 0x0C +#define RGMII_MODE_EN_V123 (1 << 0) #define RGMII_LINK (1 << 4) #define OOB_DISABLE (1 << 5) #define RGMII_MODE_EN (1 << 6) @@ -719,8 +720,8 @@ GENET_IO_MACRO(rbuf, GENET_RBUF_OFF); /* MDIO routines */ int bcmgenet_mii_init(struct net_device *dev); +int bcmgenet_mii_connect(struct net_device *dev); int bcmgenet_mii_config(struct net_device *dev, bool init); -int bcmgenet_mii_probe(struct net_device *dev); void bcmgenet_mii_exit(struct net_device *dev); void bcmgenet_phy_power_set(struct net_device *dev, bool enable); void bcmgenet_mii_setup(struct net_device *dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 970e478a9017..17bb8d60a157 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -173,6 +173,46 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) bcmgenet_fixed_phy_link_update); } +int bcmgenet_mii_connect(struct net_device *dev) +{ + struct bcmgenet_priv *priv = netdev_priv(dev); + struct device_node *dn = priv->pdev->dev.of_node; + struct phy_device *phydev; + u32 phy_flags = 0; + int ret; + + /* Communicate the integrated PHY revision */ + if (priv->internal_phy) + phy_flags = priv->gphy_rev; + + /* Initialize link state variables that bcmgenet_mii_setup() uses */ + priv->old_link = -1; + priv->old_speed = -1; + priv->old_duplex = -1; + priv->old_pause = -1; + + if (dn) { + phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, + phy_flags, priv->phy_interface); + if (!phydev) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } + } else { + phydev = dev->phydev; + phydev->dev_flags = phy_flags; + + ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, + priv->phy_interface); + if (ret) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } + } + + return 0; +} + int bcmgenet_mii_config(struct net_device *dev, bool init) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -258,74 +298,29 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) */ if (priv->ext_phy) { reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); - reg |= RGMII_MODE_EN | id_mode_dis; + reg |= id_mode_dis; + if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) + reg |= RGMII_MODE_EN_V123; + else + reg |= RGMII_MODE_EN; bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); } - if (init) - dev_info(kdev, "configuring instance for %s\n", phy_name); - - return 0; -} - -int bcmgenet_mii_probe(struct net_device *dev) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - struct device_node *dn = priv->pdev->dev.of_node; - struct phy_device *phydev; - u32 phy_flags; - int ret; - - /* Communicate the integrated PHY revision */ - phy_flags = priv->gphy_rev; - - /* Initialize link state variables that bcmgenet_mii_setup() uses */ - priv->old_link = -1; - priv->old_speed = -1; - priv->old_duplex = -1; - priv->old_pause = -1; - - if (dn) { - phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, - phy_flags, priv->phy_interface); - if (!phydev) { - pr_err("could not attach to PHY\n"); - return -ENODEV; - } - } else { - phydev = dev->phydev; - phydev->dev_flags = phy_flags; + if (init) { + linkmode_copy(phydev->advertising, phydev->supported); - ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, - priv->phy_interface); - if (ret) { - pr_err("could not attach to PHY\n"); - return -ENODEV; - } - } + /* The internal PHY has its link interrupts routed to the + * Ethernet MAC ISRs. On GENETv5 there is a hardware issue + * that prevents the signaling of link UP interrupts when + * the link operates at 10Mbps, so fallback to polling for + * those versions of GENET. + */ + if (priv->internal_phy && !GENET_IS_V5(priv)) + phydev->irq = PHY_IGNORE_INTERRUPT; - /* Configure port multiplexer based on what the probed PHY device since - * reading the 'max-speed' property determines the maximum supported - * PHY speed which is needed for bcmgenet_mii_config() to configure - * things appropriately. - */ - ret = bcmgenet_mii_config(dev, true); - if (ret) { - phy_disconnect(dev->phydev); - return ret; + dev_info(kdev, "configuring instance for %s\n", phy_name); } - linkmode_copy(phydev->advertising, phydev->supported); - - /* The internal PHY has its link interrupts routed to the - * Ethernet MAC ISRs. On GENETv5 there is a hardware issue - * that prevents the signaling of link UP interrupts when - * the link operates at 10Mbps, so fallback to polling for - * those versions of GENET. - */ - if (priv->internal_phy && !GENET_IS_V5(priv)) - dev->phydev->irq = PHY_IGNORE_INTERRUPT; - return 0; } diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 8e8d557901a9..1e1b774e1953 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3405,17 +3405,17 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, return err; } - *tx_clk = devm_clk_get(&pdev->dev, "tx_clk"); + *tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk"); if (IS_ERR(*tx_clk)) - *tx_clk = NULL; + return PTR_ERR(*tx_clk); - *rx_clk = devm_clk_get(&pdev->dev, "rx_clk"); + *rx_clk = devm_clk_get_optional(&pdev->dev, "rx_clk"); if (IS_ERR(*rx_clk)) - *rx_clk = NULL; + return PTR_ERR(*rx_clk); - *tsu_clk = devm_clk_get(&pdev->dev, "tsu_clk"); + *tsu_clk = devm_clk_get_optional(&pdev->dev, "tsu_clk"); if (IS_ERR(*tsu_clk)) - *tsu_clk = NULL; + return PTR_ERR(*tsu_clk); err = clk_prepare_enable(*pclk); if (err) { diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.h b/drivers/net/ethernet/cavium/common/cavium_ptp.h index be2bafc7beeb..a04eccbc78e8 100644 --- a/drivers/net/ethernet/cavium/common/cavium_ptp.h +++ b/drivers/net/ethernet/cavium/common/cavium_ptp.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* cavium_ptp.h - PTP 1588 clock on Cavium hardware * Copyright (c) 2003-2015, 2017 Cavium, Inc. */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 5b602243d573..a4dead4ab0ed 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -137,13 +137,12 @@ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, static int alloc_uld_rxqs(struct adapter *adap, struct sge_uld_rxq_info *rxq_info, bool lro) { - struct sge *s = &adap->sge; unsigned int nq = rxq_info->nrxq + rxq_info->nciq; + int i, err, msi_idx, que_idx = 0, bmap_idx = 0; struct sge_ofld_rxq *q = rxq_info->uldrxq; unsigned short *ids = rxq_info->rspq_id; - unsigned int bmap_idx = 0; + struct sge *s = &adap->sge; unsigned int per_chan; - int i, err, msi_idx, que_idx = 0; per_chan = rxq_info->nrxq / adap->params.nports; @@ -161,6 +160,10 @@ static int alloc_uld_rxqs(struct adapter *adap, if (msi_idx >= 0) { bmap_idx = get_msix_idx_from_bmap(adap); + if (bmap_idx < 0) { + err = -ENOSPC; + goto freeout; + } msi_idx = adap->msix_info_ulds[bmap_idx].idx; } err = t4_sge_alloc_rxq(adap, &q->rspq, false, diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 162d7d8fb295..19379bae0144 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1235,6 +1235,8 @@ static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable) priv->rx_td_enabled = enable; } +static void update_tx_fqids(struct dpaa2_eth_priv *priv); + static int link_state_update(struct dpaa2_eth_priv *priv) { struct dpni_link_state state = {0}; @@ -1261,6 +1263,7 @@ static int link_state_update(struct dpaa2_eth_priv *priv) goto out; if (state.up) { + update_tx_fqids(priv); netif_carrier_on(priv->net_dev); netif_tx_start_all_queues(priv->net_dev); } else { @@ -2533,6 +2536,47 @@ static int set_pause(struct dpaa2_eth_priv *priv) return 0; } +static void update_tx_fqids(struct dpaa2_eth_priv *priv) +{ + struct dpni_queue_id qid = {0}; + struct dpaa2_eth_fq *fq; + struct dpni_queue queue; + int i, j, err; + + /* We only use Tx FQIDs for FQID-based enqueue, so check + * if DPNI version supports it before updating FQIDs + */ + if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_ENQUEUE_FQID_VER_MAJOR, + DPNI_ENQUEUE_FQID_VER_MINOR) < 0) + return; + + for (i = 0; i < priv->num_fqs; i++) { + fq = &priv->fq[i]; + if (fq->type != DPAA2_TX_CONF_FQ) + continue; + for (j = 0; j < dpaa2_eth_tc_count(priv); j++) { + err = dpni_get_queue(priv->mc_io, 0, priv->mc_token, + DPNI_QUEUE_TX, j, fq->flowid, + &queue, &qid); + if (err) + goto out_err; + + fq->tx_fqid[j] = qid.fqid; + if (fq->tx_fqid[j] == 0) + goto out_err; + } + } + + priv->enqueue = dpaa2_eth_enqueue_fq; + + return; + +out_err: + netdev_info(priv->net_dev, + "Error reading Tx FQID, fallback to QDID-based enqueue\n"); + priv->enqueue = dpaa2_eth_enqueue_qd; +} + /* Configure the DPNI object this interface is associated with */ static int setup_dpni(struct fsl_mc_device *ls_dev) { @@ -3306,6 +3350,9 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg) if (status & DPNI_IRQ_EVENT_LINK_CHANGED) link_state_update(netdev_priv(net_dev)); + if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED) + set_mac_addr(netdev_priv(net_dev)); + return IRQ_HANDLED; } @@ -3331,7 +3378,8 @@ static int setup_irqs(struct fsl_mc_device *ls_dev) } err = dpni_set_irq_mask(ls_dev->mc_io, 0, ls_dev->mc_handle, - DPNI_IRQ_INDEX, DPNI_IRQ_EVENT_LINK_CHANGED); + DPNI_IRQ_INDEX, DPNI_IRQ_EVENT_LINK_CHANGED | + DPNI_IRQ_EVENT_ENDPOINT_CHANGED); if (err < 0) { dev_err(&ls_dev->dev, "dpni_set_irq_mask(): %d\n", err); goto free_irq; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h index fd583911b6c0..ee0711d06b3a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h @@ -133,9 +133,12 @@ int dpni_reset(struct fsl_mc_io *mc_io, */ #define DPNI_IRQ_INDEX 0 /** - * IRQ event - indicates a change in link state + * IRQ events: + * indicates a change in link state + * indicates a change in endpoint */ #define DPNI_IRQ_EVENT_LINK_CHANGED 0x00000001 +#define DPNI_IRQ_EVENT_ENDPOINT_CHANGED 0x00000002 int dpni_set_irq_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index c4b7bf851a28..75ccc1e7076b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -32,6 +32,8 @@ #define HNAE3_MOD_VERSION "1.0" +#define HNAE3_MIN_VECTOR_NUM 2 /* first one for misc, another for IO */ + /* Device IDs */ #define HNAE3_DEV_ID_GE 0xA220 #define HNAE3_DEV_ID_25GE 0xA221 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index fd7f94372ff0..e02e01bd9eff 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -906,6 +906,9 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number), HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); + /* nic's msix numbers is always equals to the roce's. */ + hdev->num_nic_msi = hdev->num_roce_msi; + /* PF should have NIC vectors and Roce vectors, * NIC vectors are queued before Roce vectors. */ @@ -915,6 +918,15 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) hdev->num_msi = hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number), HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); + + hdev->num_nic_msi = hdev->num_msi; + } + + if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) { + dev_err(&hdev->pdev->dev, + "Just %u msi resources, not enough for pf(min:2).\n", + hdev->num_nic_msi); + return -EINVAL; } return 0; @@ -1507,6 +1519,10 @@ static int hclge_assign_tqp(struct hclge_vport *vport, u16 num_tqps) kinfo->rss_size = min_t(u16, hdev->rss_size_max, vport->alloc_tqps / hdev->tm_info.num_tc); + /* ensure one to one mapping between irq and queue at default */ + kinfo->rss_size = min_t(u16, kinfo->rss_size, + (hdev->num_nic_msi - 1) / hdev->tm_info.num_tc); + return 0; } @@ -2285,7 +2301,8 @@ static int hclge_init_msi(struct hclge_dev *hdev) int vectors; int i; - vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, + vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM, + hdev->num_msi, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (vectors < 0) { dev_err(&pdev->dev, @@ -2300,6 +2317,7 @@ static int hclge_init_msi(struct hclge_dev *hdev) hdev->num_msi = vectors; hdev->num_msi_left = vectors; + hdev->base_msi_vector = pdev->irq; hdev->roce_base_vector = hdev->base_msi_vector + hdev->roce_base_msix_offset; @@ -3903,6 +3921,7 @@ static int hclge_get_vector(struct hnae3_handle *handle, u16 vector_num, int alloc = 0; int i, j; + vector_num = min_t(u16, hdev->num_nic_msi - 1, vector_num); vector_num = min(hdev->num_msi_left, vector_num); for (j = 0; j < vector_num; j++) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 3e9574a9e22d..c3d56b872ed7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -763,6 +763,7 @@ struct hclge_dev { u32 base_msi_vector; u16 *vector_status; int *vector_irq; + u16 num_nic_msi; /* Num of nic vectors for this PF */ u16 num_roce_msi; /* Num of roce vectors for this PF */ int roce_base_vector; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 9f0e35f27789..62399cc1c5a6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -537,9 +537,16 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) kinfo->rss_size = kinfo->req_rss_size; } else if (kinfo->rss_size > max_rss_size || (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) { + /* if user not set rss, the rss_size should compare with the + * valid msi numbers to ensure one to one map between tqp and + * irq as default. + */ + if (!kinfo->req_rss_size) + max_rss_size = min_t(u16, max_rss_size, + (hdev->num_nic_msi - 1) / + kinfo->num_tc); + /* Set to the maximum specification value (max_rss_size). */ - dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n", - kinfo->rss_size, max_rss_size); kinfo->rss_size = max_rss_size; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index e3090b3dab1d..7d7e712691b9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -411,6 +411,13 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev) kinfo->tqp[i] = &hdev->htqp[i].q; } + /* after init the max rss_size and tqps, adjust the default tqp numbers + * and rss size with the actual vector numbers + */ + kinfo->num_tqps = min_t(u16, hdev->num_nic_msix - 1, kinfo->num_tqps); + kinfo->rss_size = min_t(u16, kinfo->num_tqps / kinfo->num_tc, + kinfo->rss_size); + return 0; } @@ -502,6 +509,7 @@ static int hclgevf_get_vector(struct hnae3_handle *handle, u16 vector_num, int alloc = 0; int i, j; + vector_num = min_t(u16, hdev->num_nic_msix - 1, vector_num); vector_num = min(hdev->num_msi_left, vector_num); for (j = 0; j < vector_num; j++) { @@ -2246,13 +2254,14 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev) int vectors; int i; - if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) + if (hnae3_dev_roce_supported(hdev)) vectors = pci_alloc_irq_vectors(pdev, hdev->roce_base_msix_offset + 1, hdev->num_msi, PCI_IRQ_MSIX); else - vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, + vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM, + hdev->num_msi, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (vectors < 0) { @@ -2268,6 +2277,7 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev) hdev->num_msi = vectors; hdev->num_msi_left = vectors; + hdev->base_msi_vector = pdev->irq; hdev->roce_base_vector = pdev->irq + hdev->roce_base_msix_offset; @@ -2533,7 +2543,7 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev) req = (struct hclgevf_query_res_cmd *)desc.data; - if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) { + if (hnae3_dev_roce_supported(hdev)) { hdev->roce_base_msix_offset = hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee), HCLGEVF_MSIX_OFT_ROCEE_M, @@ -2542,6 +2552,9 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev) hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number), HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S); + /* nic's msix numbers is always equals to the roce's. */ + hdev->num_nic_msix = hdev->num_roce_msix; + /* VF should have NIC vectors and Roce vectors, NIC vectors * are queued before Roce vectors. The offset is fixed to 64. */ @@ -2551,6 +2564,15 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev) hdev->num_msi = hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number), HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S); + + hdev->num_nic_msix = hdev->num_msi; + } + + if (hdev->num_nic_msix < HNAE3_MIN_VECTOR_NUM) { + dev_err(&hdev->pdev->dev, + "Just %u msi resources, not enough for vf(min:2).\n", + hdev->num_nic_msix); + return -EINVAL; } return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index bdde3afc286b..2b8d6bc6d224 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -270,6 +270,7 @@ struct hclgevf_dev { u16 num_msi; u16 num_msi_left; u16 num_msi_used; + u16 num_nic_msix; /* Num of nic vectors for this VF */ u16 num_roce_msix; /* Num of roce vectors for this VF */ u16 roce_base_msix_offset; int roce_base_vector; diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 3e863a71c513..7df5d7d211d4 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -148,11 +148,15 @@ static int mdio_sc_cfg_reg_write(struct hns_mdio_device *mdio_dev, { u32 time_cnt; u32 reg_value; + int ret; regmap_write(mdio_dev->subctrl_vbase, cfg_reg, set_val); for (time_cnt = MDIO_TIMEOUT; time_cnt; time_cnt--) { - regmap_read(mdio_dev->subctrl_vbase, st_reg, ®_value); + ret = regmap_read(mdio_dev->subctrl_vbase, st_reg, ®_value); + if (ret) + return ret; + reg_value &= st_msk; if ((!!check_st) == (!!reg_value)) break; diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c index 211c5f74b4c8..aec7e98bcc85 100644 --- a/drivers/net/ethernet/i825xx/lasi_82596.c +++ b/drivers/net/ethernet/i825xx/lasi_82596.c @@ -96,6 +96,8 @@ #define OPT_SWAP_PORT 0x0001 /* Need to wordswp on the MPU port */ +#define LIB82596_DMA_ATTR DMA_ATTR_NON_CONSISTENT + #define DMA_WBACK(ndev, addr, len) \ do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_TO_DEVICE); } while (0) @@ -200,7 +202,7 @@ static int __exit lan_remove_chip(struct parisc_device *pdev) unregister_netdev (dev); dma_free_attrs(&pdev->dev, sizeof(struct i596_private), lp->dma, - lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + lp->dma_addr, LIB82596_DMA_ATTR); free_netdev (dev); return 0; } diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index 1274ad24d6af..f9742af7f142 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -1065,7 +1065,7 @@ static int i82596_probe(struct net_device *dev) dma = dma_alloc_attrs(dev->dev.parent, sizeof(struct i596_dma), &lp->dma_addr, GFP_KERNEL, - DMA_ATTR_NON_CONSISTENT); + LIB82596_DMA_ATTR); if (!dma) { printk(KERN_ERR "%s: Couldn't get shared memory\n", __FILE__); return -ENOMEM; @@ -1087,7 +1087,7 @@ static int i82596_probe(struct net_device *dev) i = register_netdev(dev); if (i) { dma_free_attrs(dev->dev.parent, sizeof(struct i596_dma), - dma, lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + dma, lp->dma_addr, LIB82596_DMA_ATTR); return i; } diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index 6eb6c2ff7f09..6436a98c5953 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -24,6 +24,8 @@ static const char sni_82596_string[] = "snirm_82596"; +#define LIB82596_DMA_ATTR 0 + #define DMA_WBACK(priv, addr, len) do { } while (0) #define DMA_INV(priv, addr, len) do { } while (0) #define DMA_WBACK_INV(priv, addr, len) do { } while (0) @@ -152,7 +154,7 @@ static int sni_82596_driver_remove(struct platform_device *pdev) unregister_netdev(dev); dma_free_attrs(dev->dev.parent, sizeof(struct i596_private), lp->dma, - lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + lp->dma_addr, LIB82596_DMA_ATTR); iounmap(lp->ca); iounmap(lp->mpu_port); free_netdev (dev); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2b073a3c0b84..f59d9a8e35e2 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2878,12 +2878,10 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, if (test_bit(0, &adapter->resetting) && adapter->reset_reason == VNIC_RESET_MOBILITY) { - u64 val = (0xff000000) | scrq->hw_irq; + struct irq_desc *desc = irq_to_desc(scrq->irq); + struct irq_chip *chip = irq_desc_get_chip(desc); - rc = plpar_hcall_norets(H_EOI, val); - if (rc) - dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", - val, rc); + chip->irq_eoi(&desc->irq_data); } rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c61069340f4f..703adb96429e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -261,6 +261,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, ge_mode = 0; switch (state->interface) { case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_GMII: ge_mode = 1; break; case PHY_INTERFACE_MODE_REVMII: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 9231b39d18b2..c501bf2a0252 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -112,17 +112,11 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; struct xarray *mkeys = &dev->priv.mkey_table; - struct mlx5_core_mkey *deleted_mkey; unsigned long flags; xa_lock_irqsave(mkeys, flags); - deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); + __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); xa_unlock_irqrestore(mkeys, flags); - if (!deleted_mkey) { - mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n", - mlx5_base_mkey(mkey->key)); - return -ENOENT; - } MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index 913f1e5aaaf2..d7c7467e2d53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -137,7 +137,8 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool, icm_mr->icm_start_addr = icm_mr->dm.addr; - align_diff = icm_mr->icm_start_addr % align_base; + /* align_base is always a power of 2 */ + align_diff = icm_mr->icm_start_addr & (align_base - 1); if (align_diff) icm_mr->used_length = align_base - align_diff; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 4187f2b112b8..e8b656075c6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -788,12 +788,10 @@ again: * it means that all the previous stes are the same, * if so, this rule is duplicated. */ - if (mlx5dr_ste_is_last_in_rule(nic_matcher, - matched_ste->ste_chain_location)) { - mlx5dr_info(dmn, "Duplicate rule inserted, aborting!!\n"); - return NULL; - } - return matched_ste; + if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste_location)) + return matched_ste; + + mlx5dr_dbg(dmn, "Duplicate rule inserted\n"); } if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 899450b28621..7c03b661ae7e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -99,6 +99,7 @@ static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, devlink = priv_to_devlink(mlxsw_sp->core); in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core, local_port); + skb_push(skb, ETH_HLEN); devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port); consume_skb(skb); } diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c index b063eb78fa0c..aac115136720 100644 --- a/drivers/net/ethernet/mscc/ocelot_board.c +++ b/drivers/net/ethernet/mscc/ocelot_board.c @@ -388,13 +388,14 @@ static int mscc_ocelot_probe(struct platform_device *pdev) continue; phy = of_phy_find_device(phy_node); + of_node_put(phy_node); if (!phy) continue; err = ocelot_probe_port(ocelot, port, regs, phy); if (err) { of_node_put(portnp); - return err; + goto out_put_ports; } phy_mode = of_get_phy_mode(portnp); @@ -422,7 +423,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) "invalid phy mode for port%d, (Q)SGMII only\n", port); of_node_put(portnp); - return -EINVAL; + err = -EINVAL; + goto out_put_ports; } serdes = devm_of_phy_get(ocelot->dev, portnp, NULL); @@ -435,7 +437,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) "missing SerDes phys for port%d\n", port); - goto err_probe_ports; + of_node_put(portnp); + goto out_put_ports; } ocelot->ports[port]->serdes = serdes; @@ -447,9 +450,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Ocelot switch probed\n"); - return 0; - -err_probe_ports: +out_put_ports: + of_node_put(ports); return err; } diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 141571e2ec11..544012a67221 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1356,9 +1356,6 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) if (!is_valid_ether_addr(ndev->dev_addr)) eth_hw_addr_random(ndev); - /* Reset the ethernet controller */ - __lpc_eth_reset(pldat); - /* then shut everything down to save power */ __lpc_eth_shutdown(pldat); diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig index bd0583e409df..d25b88f53de4 100644 --- a/drivers/net/ethernet/pensando/Kconfig +++ b/drivers/net/ethernet/pensando/Kconfig @@ -20,6 +20,7 @@ if NET_VENDOR_PENSANDO config IONIC tristate "Pensando Ethernet IONIC Support" depends on 64BIT && PCI + select NET_DEVLINK help This enables the support for the Pensando family of Ethernet adapters. More specific information on this driver can be diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 812190e729c2..6a95b42a8d8c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -182,6 +182,8 @@ struct ionic_lif { #define lif_to_txqcq(lif, i) ((lif)->txqcqs[i].qcq) #define lif_to_rxqcq(lif, i) ((lif)->rxqcqs[i].qcq) +#define lif_to_txstats(lif, i) ((lif)->txqcqs[i].stats->tx) +#define lif_to_rxstats(lif, i) ((lif)->rxqcqs[i].stats->rx) #define lif_to_txq(lif, i) (&lif_to_txqcq((lif), i)->q) #define lif_to_rxq(lif, i) (&lif_to_txqcq((lif), i)->q) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c index e2907884f843..03916b6d47f2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c @@ -117,7 +117,8 @@ static u64 ionic_sw_stats_get_count(struct ionic_lif *lif) /* rx stats */ total += MAX_Q(lif) * IONIC_NUM_RX_STATS; - if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + if (test_bit(IONIC_LIF_UP, lif->state) && + test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { /* tx debug stats */ total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS + IONIC_NUM_TX_Q_STATS + @@ -149,7 +150,8 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf) *buf += ETH_GSTRING_LEN; } - if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + if (test_bit(IONIC_LIF_UP, lif->state) && + test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) { snprintf(*buf, ETH_GSTRING_LEN, "txq_%d_%s", @@ -187,7 +189,8 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf) *buf += ETH_GSTRING_LEN; } - if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + if (test_bit(IONIC_LIF_UP, lif->state) && + test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { snprintf(*buf, ETH_GSTRING_LEN, "rxq_%d_cq_%s", @@ -223,6 +226,8 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf) { struct ionic_lif_sw_stats lif_stats; struct ionic_qcq *txqcq, *rxqcq; + struct ionic_tx_stats *txstats; + struct ionic_rx_stats *rxstats; int i, q_num; ionic_get_lif_stats(lif, &lif_stats); @@ -233,15 +238,17 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf) } for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - txqcq = lif_to_txqcq(lif, q_num); + txstats = &lif_to_txstats(lif, q_num); for (i = 0; i < IONIC_NUM_TX_STATS; i++) { - **buf = IONIC_READ_STAT64(&txqcq->stats->tx, + **buf = IONIC_READ_STAT64(txstats, &ionic_tx_stats_desc[i]); (*buf)++; } - if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + if (test_bit(IONIC_LIF_UP, lif->state) && + test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + txqcq = lif_to_txqcq(lif, q_num); for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) { **buf = IONIC_READ_STAT64(&txqcq->q, &ionic_txq_stats_desc[i]); @@ -258,22 +265,24 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf) (*buf)++; } for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) { - **buf = txqcq->stats->tx.sg_cntr[i]; + **buf = txstats->sg_cntr[i]; (*buf)++; } } } for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - rxqcq = lif_to_rxqcq(lif, q_num); + rxstats = &lif_to_rxstats(lif, q_num); for (i = 0; i < IONIC_NUM_RX_STATS; i++) { - **buf = IONIC_READ_STAT64(&rxqcq->stats->rx, + **buf = IONIC_READ_STAT64(rxstats, &ionic_rx_stats_desc[i]); (*buf)++; } - if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + if (test_bit(IONIC_LIF_UP, lif->state) && + test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) { + rxqcq = lif_to_rxqcq(lif, q_num); for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { **buf = IONIC_READ_STAT64(&rxqcq->cq, &ionic_dbg_cq_stats_desc[i]); diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 457444894d80..b4b8ba00ee01 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -2787,6 +2787,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev) netdev_err(qdev->ndev, "PCI mapping failed with error: %d\n", err); + dev_kfree_skb_irq(skb); ql_free_large_buffers(qdev); return -ENOMEM; } diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 74f81fe03810..350b0d949611 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4146,6 +4146,14 @@ static void rtl_hw_jumbo_disable(struct rtl8169_private *tp) rtl_lock_config_regs(tp); } +static void rtl_jumbo_config(struct rtl8169_private *tp, int mtu) +{ + if (mtu > ETH_DATA_LEN) + rtl_hw_jumbo_enable(tp); + else + rtl_hw_jumbo_disable(tp); +} + DECLARE_RTL_COND(rtl_chipcmd_cond) { return RTL_R8(tp, ChipCmd) & CmdReset; @@ -4442,11 +4450,6 @@ static void rtl8168g_set_pause_thresholds(struct rtl8169_private *tp, static void rtl_hw_start_8168bb(struct rtl8169_private *tp) { RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en); - - if (tp->dev->mtu <= ETH_DATA_LEN) { - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B | - PCI_EXP_DEVCTL_NOSNOOP_EN); - } } static void rtl_hw_start_8168bef(struct rtl8169_private *tp) @@ -4462,9 +4465,6 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp) RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en); - if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - rtl_disable_clock_request(tp); } @@ -4490,9 +4490,6 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp) rtl_set_def_aspm_entry_latency(tp); RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en); - - if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); } static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp) @@ -4503,9 +4500,6 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp) /* Magic. */ RTL_W8(tp, DBG_REG, 0x20); - - if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); } static void rtl_hw_start_8168c_1(struct rtl8169_private *tp) @@ -4611,9 +4605,6 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8168e_1); - if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - rtl_disable_clock_request(tp); /* Reset tx FIFO pointer */ @@ -4636,9 +4627,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8168e_2); - if (tp->dev->mtu <= ETH_DATA_LEN) - rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000); rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000); rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06); @@ -5485,6 +5473,8 @@ static void rtl_hw_start(struct rtl8169_private *tp) rtl_set_rx_tx_desc_registers(tp); rtl_lock_config_regs(tp); + rtl_jumbo_config(tp, tp->dev->mtu); + /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ RTL_R16(tp, CPlusCmd); RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); @@ -5498,10 +5488,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu) { struct rtl8169_private *tp = netdev_priv(dev); - if (new_mtu > ETH_DATA_LEN) - rtl_hw_jumbo_enable(tp); - else - rtl_hw_jumbo_disable(tp); + rtl_jumbo_config(tp, new_mtu); dev->mtu = new_mtu; netdev_update_features(dev); diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 55db7fbd43cc..f9e6744d8fd6 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -282,7 +282,6 @@ struct netsec_desc_ring { void *vaddr; u16 head, tail; u16 xdp_xmit; /* netsec_xdp_xmit packets */ - bool is_xdp; struct page_pool *page_pool; struct xdp_rxq_info xdp_rxq; spinlock_t lock; /* XDP tx queue locking */ @@ -634,8 +633,7 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv) unsigned int bytes; int cnt = 0; - if (dring->is_xdp) - spin_lock(&dring->lock); + spin_lock(&dring->lock); bytes = 0; entry = dring->vaddr + DESC_SZ * tail; @@ -682,8 +680,8 @@ next: entry = dring->vaddr + DESC_SZ * tail; cnt++; } - if (dring->is_xdp) - spin_unlock(&dring->lock); + + spin_unlock(&dring->lock); if (!cnt) return false; @@ -799,9 +797,6 @@ static void netsec_set_tx_de(struct netsec_priv *priv, de->data_buf_addr_lw = lower_32_bits(desc->dma_addr); de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len; de->attr = attr; - /* under spin_lock if using XDP */ - if (!dring->is_xdp) - dma_wmb(); dring->desc[idx] = *desc; if (desc->buf_type == TYPE_NETSEC_SKB) @@ -1123,12 +1118,10 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, u16 tso_seg_len = 0; int filled; - if (dring->is_xdp) - spin_lock_bh(&dring->lock); + spin_lock_bh(&dring->lock); filled = netsec_desc_used(dring); if (netsec_check_stop_tx(priv, filled)) { - if (dring->is_xdp) - spin_unlock_bh(&dring->lock); + spin_unlock_bh(&dring->lock); net_warn_ratelimited("%s %s Tx queue full\n", dev_name(priv->dev), ndev->name); return NETDEV_TX_BUSY; @@ -1161,8 +1154,7 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, tx_desc.dma_addr = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); if (dma_mapping_error(priv->dev, tx_desc.dma_addr)) { - if (dring->is_xdp) - spin_unlock_bh(&dring->lock); + spin_unlock_bh(&dring->lock); netif_err(priv, drv, priv->ndev, "%s: DMA mapping failed\n", __func__); ndev->stats.tx_dropped++; @@ -1177,8 +1169,7 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, netdev_sent_queue(priv->ndev, skb->len); netsec_set_tx_de(priv, dring, &tx_ctrl, &tx_desc, skb); - if (dring->is_xdp) - spin_unlock_bh(&dring->lock); + spin_unlock_bh(&dring->lock); netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, 1); /* submit another tx */ return NETDEV_TX_OK; @@ -1262,7 +1253,6 @@ err: static void netsec_setup_tx_dring(struct netsec_priv *priv) { struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; - struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog); int i; for (i = 0; i < DESC_NUM; i++) { @@ -1275,12 +1265,6 @@ static void netsec_setup_tx_dring(struct netsec_priv *priv) */ de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD; } - - if (xdp_prog) - dring->is_xdp = true; - else - dring->is_xdp = false; - } static int netsec_setup_rx_dring(struct netsec_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index f97a4096f8fc..ddcc191febdb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -651,7 +651,8 @@ static void sun8i_dwmac_set_filter(struct mac_device_info *hw, } } } else { - netdev_info(dev, "Too many address, switching to promiscuous\n"); + if (!(readl(ioaddr + EMAC_RX_FRM_FLT) & EMAC_FRM_FLT_RXALL)) + netdev_info(dev, "Too many address, switching to promiscuous\n"); v = EMAC_FRM_FLT_RXALL; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 9b4b5f69fc02..5a7b0aca1d31 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -401,8 +401,11 @@ static void dwmac4_set_filter(struct mac_device_info *hw, int numhashregs = (hw->multicast_filter_bins >> 5); int mcbitslog2 = hw->mcast_bits_log2; unsigned int value; + u32 mc_filter[8]; int i; + memset(mc_filter, 0, sizeof(mc_filter)); + value = readl(ioaddr + GMAC_PACKET_FILTER); value &= ~GMAC_PACKET_FILTER_HMC; value &= ~GMAC_PACKET_FILTER_HPF; @@ -416,16 +419,13 @@ static void dwmac4_set_filter(struct mac_device_info *hw, /* Pass all multi */ value |= GMAC_PACKET_FILTER_PM; /* Set all the bits of the HASH tab */ - for (i = 0; i < numhashregs; i++) - writel(0xffffffff, ioaddr + GMAC_HASH_TAB(i)); + memset(mc_filter, 0xff, sizeof(mc_filter)); } else if (!netdev_mc_empty(dev)) { struct netdev_hw_addr *ha; - u32 mc_filter[8]; /* Hash filter for multicast */ value |= GMAC_PACKET_FILTER_HMC; - memset(mc_filter, 0, sizeof(mc_filter)); netdev_for_each_mc_addr(ha, dev) { /* The upper n bits of the calculated CRC are used to * index the contents of the hash table. The number of @@ -440,14 +440,15 @@ static void dwmac4_set_filter(struct mac_device_info *hw, */ mc_filter[bit_nr >> 5] |= (1 << (bit_nr & 0x1f)); } - for (i = 0; i < numhashregs; i++) - writel(mc_filter[i], ioaddr + GMAC_HASH_TAB(i)); } + for (i = 0; i < numhashregs; i++) + writel(mc_filter[i], ioaddr + GMAC_HASH_TAB(i)); + value |= GMAC_PACKET_FILTER_HPF; /* Handle multiple unicast addresses */ - if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) { + if (netdev_uc_count(dev) > hw->unicast_filter_entries) { /* Switch to promiscuous mode if more than 128 addrs * are required */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 3f4f3132e16b..e436fa160c7d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -515,6 +515,7 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, if (!enable) { val |= PPSCMDx(index, 0x5); + val |= PPSEN0; writel(val, ioaddr + MAC_PPS_CONTROL); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 5923ca62d793..99037386080a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -84,7 +84,7 @@ #define XGMAC_TSIE BIT(12) #define XGMAC_LPIIE BIT(5) #define XGMAC_PMTIE BIT(4) -#define XGMAC_INT_DEFAULT_EN (XGMAC_LPIIE | XGMAC_PMTIE | XGMAC_TSIE) +#define XGMAC_INT_DEFAULT_EN (XGMAC_LPIIE | XGMAC_PMTIE) #define XGMAC_Qx_TX_FLOW_CTRL(x) (0x00000070 + (x) * 4) #define XGMAC_PT GENMASK(31, 16) #define XGMAC_PT_SHIFT 16 @@ -122,6 +122,7 @@ #define XGMAC_HWFEAT_GMIISEL BIT(1) #define XGMAC_HW_FEATURE1 0x00000120 #define XGMAC_HWFEAT_L3L4FNUM GENMASK(30, 27) +#define XGMAC_HWFEAT_HASHTBLSZ GENMASK(25, 24) #define XGMAC_HWFEAT_RSSEN BIT(20) #define XGMAC_HWFEAT_TSOEN BIT(18) #define XGMAC_HWFEAT_SPHEN BIT(17) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 2b277b2c586b..5031398e612c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -472,7 +472,7 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw, dwxgmac2_set_mchash(ioaddr, mc_filter, mcbitslog2); /* Handle multiple unicast addresses */ - if (netdev_uc_count(dev) > XGMAC_ADDR_MAX) { + if (netdev_uc_count(dev) > hw->unicast_filter_entries) { value |= XGMAC_FILTER_PR; } else { struct netdev_hw_addr *ha; @@ -523,8 +523,8 @@ static int dwxgmac2_rss_configure(struct mac_device_info *hw, struct stmmac_rss *cfg, u32 num_rxq) { void __iomem *ioaddr = hw->pcsr; + u32 value, *key; int i, ret; - u32 value; value = readl(ioaddr + XGMAC_RSS_CTRL); if (!cfg || !cfg->enable) { @@ -533,8 +533,9 @@ static int dwxgmac2_rss_configure(struct mac_device_info *hw, return 0; } - for (i = 0; i < (sizeof(cfg->key) / sizeof(u32)); i++) { - ret = dwxgmac2_rss_write_reg(ioaddr, true, i, cfg->key[i]); + key = (u32 *)cfg->key; + for (i = 0; i < (ARRAY_SIZE(cfg->key) / sizeof(u32)); i++) { + ret = dwxgmac2_rss_write_reg(ioaddr, true, i, key[i]); if (ret) return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 53c4a40d8386..965cbe3e6f51 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -380,6 +380,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, /* MAC HW feature 1 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1); dma_cap->l3l4fnum = (hw_cap & XGMAC_HWFEAT_L3L4FNUM) >> 27; + dma_cap->hash_tb_sz = (hw_cap & XGMAC_HWFEAT_HASHTBLSZ) >> 24; dma_cap->rssen = (hw_cap & XGMAC_HWFEAT_RSSEN) >> 20; dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18; dma_cap->sphen = (hw_cap & XGMAC_HWFEAT_SPHEN) >> 17; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d3232738fb25..3dfd04e0506a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -629,6 +629,7 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; ptp_v2 = PTP_TCR_TSVER2ENA; snap_type_sel = PTP_TCR_SNAPTYPSEL_1; + ts_event_en = PTP_TCR_TSEVNTENA; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; ptp_over_ethernet = PTP_TCR_TSIPENA; @@ -2609,7 +2610,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) } if (priv->hw->pcs) - stmmac_pcs_ctrl_ane(priv, priv->hw, 1, priv->hw->ps, 0); + stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, priv->hw->ps, 0); /* set TX and RX rings length */ stmmac_set_rings_length(priv); @@ -4715,11 +4716,9 @@ int stmmac_suspend(struct device *dev) if (!ndev || !netif_running(ndev)) return 0; - mutex_lock(&priv->lock); + phylink_mac_change(priv->phylink, false); - rtnl_lock(); - phylink_stop(priv->phylink); - rtnl_unlock(); + mutex_lock(&priv->lock); netif_device_detach(ndev); stmmac_stop_all_queues(priv); @@ -4734,11 +4733,19 @@ int stmmac_suspend(struct device *dev) stmmac_pmt(priv, priv->hw, priv->wolopts); priv->irq_wake = 1; } else { + mutex_unlock(&priv->lock); + rtnl_lock(); + phylink_stop(priv->phylink); + rtnl_unlock(); + mutex_lock(&priv->lock); + stmmac_mac_set(priv, priv->ioaddr, false); pinctrl_pm_select_sleep_state(priv->device); /* Disable clock in case of PWM is off */ - clk_disable(priv->plat->pclk); - clk_disable(priv->plat->stmmac_clk); + if (priv->plat->clk_ptp_ref) + clk_disable_unprepare(priv->plat->clk_ptp_ref); + clk_disable_unprepare(priv->plat->pclk); + clk_disable_unprepare(priv->plat->stmmac_clk); } mutex_unlock(&priv->lock); @@ -4801,8 +4808,10 @@ int stmmac_resume(struct device *dev) } else { pinctrl_pm_select_default_state(priv->device); /* enable the clk previously disabled */ - clk_enable(priv->plat->stmmac_clk); - clk_enable(priv->plat->pclk); + clk_prepare_enable(priv->plat->stmmac_clk); + clk_prepare_enable(priv->plat->pclk); + if (priv->plat->clk_ptp_ref) + clk_prepare_enable(priv->plat->clk_ptp_ref); /* reset the phy so that it's ready */ if (priv->mii) stmmac_mdio_reset(priv->mii); @@ -4824,12 +4833,16 @@ int stmmac_resume(struct device *dev) stmmac_start_all_queues(priv); - rtnl_lock(); - phylink_start(priv->phylink); - rtnl_unlock(); - mutex_unlock(&priv->lock); + if (!device_may_wakeup(priv->device)) { + rtnl_lock(); + phylink_start(priv->phylink); + rtnl_unlock(); + } + + phylink_mac_change(priv->phylink, true); + return 0; } EXPORT_SYMBOL_GPL(stmmac_resume); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 173493db038c..df638b18b72c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -164,7 +164,7 @@ static int stmmac_enable(struct ptp_clock_info *ptp, /* structure describing a PTP hardware clock */ static struct ptp_clock_info stmmac_ptp_clock_ops = { .owner = THIS_MODULE, - .name = "stmmac_ptp_clock", + .name = "stmmac ptp", .max_adj = 62500000, .n_alarm = 0, .n_ext_ts = 0, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c index 5f66f6161629..e4ac3c401432 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -487,8 +487,8 @@ static int stmmac_filter_check(struct stmmac_priv *priv) static int stmmac_test_hfilt(struct stmmac_priv *priv) { - unsigned char gd_addr[ETH_ALEN] = {0x01, 0x00, 0xcc, 0xcc, 0xdd, 0xdd}; - unsigned char bd_addr[ETH_ALEN] = {0x09, 0x00, 0xaa, 0xaa, 0xbb, 0xbb}; + unsigned char gd_addr[ETH_ALEN] = {0x01, 0xee, 0xdd, 0xcc, 0xbb, 0xaa}; + unsigned char bd_addr[ETH_ALEN] = {0x01, 0x01, 0x02, 0x03, 0x04, 0x05}; struct stmmac_packet_attrs attr = { }; int ret; @@ -496,6 +496,9 @@ static int stmmac_test_hfilt(struct stmmac_priv *priv) if (ret) return ret; + if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins) + return -EOPNOTSUPP; + ret = dev_mc_add(priv->dev, gd_addr); if (ret) return ret; @@ -573,6 +576,8 @@ static int stmmac_test_mcfilt(struct stmmac_priv *priv) if (stmmac_filter_check(priv)) return -EOPNOTSUPP; + if (!priv->hw->multicast_filter_bins) + return -EOPNOTSUPP; /* Remove all MC addresses */ __dev_mc_unsync(priv->dev, NULL); @@ -611,6 +616,8 @@ static int stmmac_test_ucfilt(struct stmmac_priv *priv) if (stmmac_filter_check(priv)) return -EOPNOTSUPP; + if (!priv->hw->multicast_filter_bins) + return -EOPNOTSUPP; /* Remove all UC addresses */ __dev_uc_unsync(priv->dev, NULL); @@ -1564,10 +1571,6 @@ static int __stmmac_test_jumbo(struct stmmac_priv *priv, u16 queue) struct stmmac_packet_attrs attr = { }; int size = priv->dma_buf_sz; - /* Only XGMAC has SW support for multiple RX descs in same packet */ - if (priv->plat->has_xgmac) - size = priv->dev->max_mtu; - attr.dst = priv->dev->dev_addr; attr.max_size = size - ETH_FCS_LEN; attr.queue_mapping = queue; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index e231098061b6..f9a9a9d82233 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -510,7 +510,7 @@ static struct stmmac_flow_entry *tc_find_flow(struct stmmac_priv *priv, return NULL; } -struct { +static struct { int (*fn)(struct stmmac_priv *priv, struct flow_cls_offload *cls, struct stmmac_flow_entry *entry); } tc_flow_parsers[] = { diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index a65edd2770e6..37ba708ac781 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -722,7 +722,7 @@ static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr, * cpdma_chan_split_pool - Splits ctrl pool between all channels. * Has to be called under ctlr lock */ -int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) +static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) { int tx_per_ch_desc = 0, rx_per_ch_desc = 0; int free_rx_num = 0, free_tx_num = 0; diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index ceddb424f887..0dd0ba915ab9 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -1137,10 +1137,11 @@ static void atusb_disconnect(struct usb_interface *interface) ieee802154_unregister_hw(atusb->hw); + usb_put_dev(atusb->usb_dev); + ieee802154_free_hw(atusb->hw); usb_set_intfdata(interface, NULL); - usb_put_dev(atusb->usb_dev); pr_debug("%s done\n", __func__); } diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 11402dc347db..430c93786153 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -3145,12 +3145,12 @@ static int ca8210_probe(struct spi_device *spi_device) goto error; } + priv->spi->dev.platform_data = pdata; ret = ca8210_get_platform_data(priv->spi, pdata); if (ret) { dev_crit(&spi_device->dev, "ca8210_get_platform_data failed\n"); goto error; } - priv->spi->dev.platform_data = pdata; ret = ca8210_dev_com_init(priv); if (ret) { diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 17f2300e63ee..8dc04e2590b1 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -800,7 +800,7 @@ mcr20a_handle_rx_read_buf_complete(void *context) if (!skb) return; - memcpy(skb_put(skb, len), lp->rx_buf, len); + __skb_put_data(skb, lp->rx_buf, len); ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]); print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1, diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index f61d094746c0..1a251f76d09b 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -241,8 +241,8 @@ static struct pernet_operations nsim_fib_net_ops = { void nsim_fib_exit(void) { - unregister_pernet_subsys(&nsim_fib_net_ops); unregister_fib_notifier(&nsim_fib_nb); + unregister_pernet_subsys(&nsim_fib_net_ops); } int nsim_fib_init(void) @@ -258,6 +258,7 @@ int nsim_fib_init(void) err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent); if (err < 0) { pr_err("Failed to register fib notifier\n"); + unregister_pernet_subsys(&nsim_fib_net_ops); goto err_out; } diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 2aa7b2e60046..1eb5d4fb8925 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -15,6 +15,15 @@ #include <linux/of_gpio.h> #include <linux/gpio/consumer.h> +#define AT803X_SPECIFIC_STATUS 0x11 +#define AT803X_SS_SPEED_MASK (3 << 14) +#define AT803X_SS_SPEED_1000 (2 << 14) +#define AT803X_SS_SPEED_100 (1 << 14) +#define AT803X_SS_SPEED_10 (0 << 14) +#define AT803X_SS_DUPLEX BIT(13) +#define AT803X_SS_SPEED_DUPLEX_RESOLVED BIT(11) +#define AT803X_SS_MDIX BIT(6) + #define AT803X_INTR_ENABLE 0x12 #define AT803X_INTR_ENABLE_AUTONEG_ERR BIT(15) #define AT803X_INTR_ENABLE_SPEED_CHANGED BIT(14) @@ -357,6 +366,64 @@ static int at803x_aneg_done(struct phy_device *phydev) return aneg_done; } +static int at803x_read_status(struct phy_device *phydev) +{ + int ss, err, old_link = phydev->link; + + /* Update the link, but return if there was an error */ + err = genphy_update_link(phydev); + if (err) + return err; + + /* why bother the PHY if nothing can have changed */ + if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) + return 0; + + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->pause = 0; + phydev->asym_pause = 0; + + err = genphy_read_lpa(phydev); + if (err < 0) + return err; + + /* Read the AT8035 PHY-Specific Status register, which indicates the + * speed and duplex that the PHY is actually using, irrespective of + * whether we are in autoneg mode or not. + */ + ss = phy_read(phydev, AT803X_SPECIFIC_STATUS); + if (ss < 0) + return ss; + + if (ss & AT803X_SS_SPEED_DUPLEX_RESOLVED) { + switch (ss & AT803X_SS_SPEED_MASK) { + case AT803X_SS_SPEED_10: + phydev->speed = SPEED_10; + break; + case AT803X_SS_SPEED_100: + phydev->speed = SPEED_100; + break; + case AT803X_SS_SPEED_1000: + phydev->speed = SPEED_1000; + break; + } + if (ss & AT803X_SS_DUPLEX) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + if (ss & AT803X_SS_MDIX) + phydev->mdix = ETH_TP_MDI_X; + else + phydev->mdix = ETH_TP_MDI; + } + + if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) + phy_resolve_aneg_pause(phydev); + + return 0; +} + static struct phy_driver at803x_driver[] = { { /* ATHEROS 8035 */ @@ -370,6 +437,7 @@ static struct phy_driver at803x_driver[] = { .suspend = at803x_suspend, .resume = at803x_resume, /* PHY_GBIT_FEATURES */ + .read_status = at803x_read_status, .ack_interrupt = at803x_ack_interrupt, .config_intr = at803x_config_intr, }, { @@ -399,6 +467,7 @@ static struct phy_driver at803x_driver[] = { .suspend = at803x_suspend, .resume = at803x_resume, /* PHY_GBIT_FEATURES */ + .read_status = at803x_read_status, .aneg_done = at803x_aneg_done, .ack_interrupt = &at803x_ack_interrupt, .config_intr = &at803x_config_intr, diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 8fc33867e524..af8eabe7a6d4 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -572,6 +572,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .name = _name, \ /* PHY_BASIC_FEATURES */ \ .flags = PHY_IS_INTERNAL, \ + .soft_reset = genphy_soft_reset, \ .config_init = bcm7xxx_config_init, \ .suspend = bcm7xxx_suspend, \ .resume = bcm7xxx_config_init, \ diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index e282600bd83e..c1d345c3cab3 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -121,7 +121,7 @@ void mdio_device_reset(struct mdio_device *mdiodev, int value) return; if (mdiodev->reset_gpio) - gpiod_set_value(mdiodev->reset_gpio, value); + gpiod_set_value_cansleep(mdiodev->reset_gpio, value); if (mdiodev->reset_ctrl) { if (value) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 2fea5541c35a..63dedec0433d 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -341,6 +341,35 @@ static int ksz8041_config_aneg(struct phy_device *phydev) return genphy_config_aneg(phydev); } +static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, + const u32 ksz_phy_id) +{ + int ret; + + if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id) + return 0; + + ret = phy_read(phydev, MII_BMSR); + if (ret < 0) + return ret; + + /* KSZ8051 PHY and KSZ8794/KSZ8795/KSZ8765 switch share the same + * exact PHY ID. However, they can be told apart by the extended + * capability registers presence. The KSZ8051 PHY has them while + * the switch does not. + */ + ret &= BMSR_ERCAP; + if (ksz_phy_id == PHY_ID_KSZ8051) + return ret; + else + return !ret; +} + +static int ksz8051_match_phy_device(struct phy_device *phydev) +{ + return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051); +} + static int ksz8081_config_init(struct phy_device *phydev) { /* KSZPHY_OMSO_FACTORY_TEST is set at de-assertion of the reset line @@ -364,6 +393,11 @@ static int ksz8061_config_init(struct phy_device *phydev) return kszphy_config_init(phydev); } +static int ksz8795_match_phy_device(struct phy_device *phydev) +{ + return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX); +} + static int ksz9021_load_values_from_of(struct phy_device *phydev, const struct device_node *of_node, u16 reg, @@ -1017,8 +1051,6 @@ static struct phy_driver ksphy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, }, { - .phy_id = PHY_ID_KSZ8051, - .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8051", /* PHY_BASIC_FEATURES */ .driver_data = &ksz8051_type, @@ -1029,6 +1061,7 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, + .match_phy_device = ksz8051_match_phy_device, .suspend = genphy_suspend, .resume = genphy_resume, }, { @@ -1141,13 +1174,12 @@ static struct phy_driver ksphy_driver[] = { .suspend = genphy_suspend, .resume = genphy_resume, }, { - .phy_id = PHY_ID_KSZ8795, - .phy_id_mask = MICREL_PHY_ID_MASK, - .name = "Micrel KSZ8795", + .name = "Micrel KSZ87XX Switch", /* PHY_BASIC_FEATURES */ .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, .read_status = ksz8873mll_read_status, + .match_phy_device = ksz8795_match_phy_device, .suspend = genphy_suspend, .resume = genphy_resume, }, { diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 7935593debb1..a1caeee12236 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -323,6 +323,8 @@ int genphy_c45_read_pma(struct phy_device *phydev) { int val; + linkmode_zero(phydev->lp_advertising); + val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1); if (val < 0) return val; diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 369903d9b6ec..9412669b579c 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -283,6 +283,18 @@ void of_set_phy_eee_broken(struct phy_device *phydev) phydev->eee_broken_modes = broken; } +void phy_resolve_aneg_pause(struct phy_device *phydev) +{ + if (phydev->duplex == DUPLEX_FULL) { + phydev->pause = linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, + phydev->lp_advertising); + phydev->asym_pause = linkmode_test_bit( + ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->lp_advertising); + } +} +EXPORT_SYMBOL_GPL(phy_resolve_aneg_pause); + /** * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings * @phydev: The phy_device struct @@ -305,13 +317,7 @@ void phy_resolve_aneg_linkmode(struct phy_device *phydev) break; } - if (phydev->duplex == DUPLEX_FULL) { - phydev->pause = linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->lp_advertising); - phydev->asym_pause = linkmode_test_bit( - ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->lp_advertising); - } + phy_resolve_aneg_pause(phydev); } EXPORT_SYMBOL_GPL(phy_resolve_aneg_linkmode); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 7c92afd36bbe..105d389b58e7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -457,6 +457,11 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) val); change_autoneg = true; break; + case MII_CTRL1000: + mii_ctrl1000_mod_linkmode_adv_t(phydev->advertising, + val); + change_autoneg = true; + break; default: /* do nothing */ break; @@ -567,9 +572,6 @@ int phy_start_aneg(struct phy_device *phydev) if (AUTONEG_DISABLE == phydev->autoneg) phy_sanitize_settings(phydev); - /* Invalidate LP advertising flags */ - linkmode_zero(phydev->lp_advertising); - err = phy_config_aneg(phydev); if (err < 0) goto out_unlock; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index d347ddcac45b..adb66a2fae18 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1783,34 +1783,18 @@ done: } EXPORT_SYMBOL(genphy_update_link); -/** - * genphy_read_status - check the link status and update current link state - * @phydev: target phy_device struct - * - * Description: Check the link, then figure out the current state - * by comparing what we advertise with what the link partner - * advertises. Start by checking the gigabit possibilities, - * then move on to 10/100. - */ -int genphy_read_status(struct phy_device *phydev) +int genphy_read_lpa(struct phy_device *phydev) { - int lpa, lpagb, err, old_link = phydev->link; - - /* Update the link, but return if there was an error */ - err = genphy_update_link(phydev); - if (err) - return err; - - /* why bother the PHY if nothing can have changed */ - if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) - return 0; + int lpa, lpagb; - phydev->speed = SPEED_UNKNOWN; - phydev->duplex = DUPLEX_UNKNOWN; - phydev->pause = 0; - phydev->asym_pause = 0; + if (phydev->autoneg == AUTONEG_ENABLE) { + if (!phydev->autoneg_complete) { + mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, + 0); + mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, 0); + return 0; + } - if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { if (phydev->is_gigabit_capable) { lpagb = phy_read(phydev, MII_STAT1000); if (lpagb < 0) @@ -1838,6 +1822,46 @@ int genphy_read_status(struct phy_device *phydev) return lpa; mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa); + } else { + linkmode_zero(phydev->lp_advertising); + } + + return 0; +} +EXPORT_SYMBOL(genphy_read_lpa); + +/** + * genphy_read_status - check the link status and update current link state + * @phydev: target phy_device struct + * + * Description: Check the link, then figure out the current state + * by comparing what we advertise with what the link partner + * advertises. Start by checking the gigabit possibilities, + * then move on to 10/100. + */ +int genphy_read_status(struct phy_device *phydev) +{ + int err, old_link = phydev->link; + + /* Update the link, but return if there was an error */ + err = genphy_update_link(phydev); + if (err) + return err; + + /* why bother the PHY if nothing can have changed */ + if (phydev->autoneg == AUTONEG_ENABLE && old_link && phydev->link) + return 0; + + phydev->speed = SPEED_UNKNOWN; + phydev->duplex = DUPLEX_UNKNOWN; + phydev->pause = 0; + phydev->asym_pause = 0; + + err = genphy_read_lpa(phydev); + if (err < 0) + return err; + + if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { phy_resolve_aneg_linkmode(phydev); } else if (phydev->autoneg == AUTONEG_DISABLE) { int bmcr = phy_read(phydev, MII_BMCR); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index a5a57ca94c1a..20e2ebe458f2 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -576,7 +576,7 @@ static int phylink_register_sfp(struct phylink *pl, /** * phylink_create() - create a phylink instance - * @ndev: a pointer to the &struct net_device + * @config: a pointer to the target &struct phylink_config * @fwnode: a pointer to a &struct fwnode_handle describing the network * interface * @iface: the desired link mode defined by &typedef phy_interface_t diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 734de7de03f7..e1fabb3e3246 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -238,7 +238,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); - nf_reset(skb); + nf_reset_ct(skb); skb->ip_summed = CHECKSUM_NONE; ip_select_ident(net, skb, NULL); @@ -358,7 +358,7 @@ static int pptp_rcv(struct sk_buff *skb) po = lookup_chan(htons(header->call_id), iph->saddr); if (po) { skb_dst_drop(skb); - nf_reset(skb); + nf_reset_ct(skb); return sk_receive_skb(sk_pppox(po), skb, 0); } drop: diff --git a/drivers/net/tun.c b/drivers/net/tun.c index aab0be40d443..a8d3141582a5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -526,8 +526,8 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, e = tun_flow_find(head, rxhash); if (likely(e)) { /* TODO: keep queueing to old queue until it's empty? */ - if (e->queue_index != queue_index) - e->queue_index = queue_index; + if (READ_ONCE(e->queue_index) != queue_index) + WRITE_ONCE(e->queue_index, queue_index); if (e->updated != jiffies) e->updated = jiffies; sock_rps_record_flow_hash(e->rps_rxhash); @@ -1104,7 +1104,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) */ skb_orphan(skb); - nf_reset(skb); + nf_reset_ct(skb); if (ptr_ring_produce(&tfile->tx_ring, skb)) goto drop; diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index ce78714f536f..74849da031fa 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -186,7 +186,7 @@ struct hso_tiocmget { int intr_completed; struct usb_endpoint_descriptor *endp; struct urb *urb; - struct hso_serial_state_notification serial_state_notification; + struct hso_serial_state_notification *serial_state_notification; u16 prev_UART_state_bitmap; struct uart_icount icount; }; @@ -1432,7 +1432,7 @@ static int tiocmget_submit_urb(struct hso_serial *serial, usb_rcvintpipe(usb, tiocmget->endp-> bEndpointAddress & 0x7F), - &tiocmget->serial_state_notification, + tiocmget->serial_state_notification, sizeof(struct hso_serial_state_notification), tiocmget_intr_callback, serial, tiocmget->endp->bInterval); @@ -1479,7 +1479,7 @@ static void tiocmget_intr_callback(struct urb *urb) /* wIndex should be the USB interface number of the port to which the * notification applies, which should always be the Modem port. */ - serial_state_notification = &tiocmget->serial_state_notification; + serial_state_notification = tiocmget->serial_state_notification; if (serial_state_notification->bmRequestType != BM_REQUEST_TYPE || serial_state_notification->bNotification != B_NOTIFICATION || le16_to_cpu(serial_state_notification->wValue) != W_VALUE || @@ -2565,6 +2565,8 @@ static void hso_free_tiomget(struct hso_serial *serial) usb_free_urb(tiocmget->urb); tiocmget->urb = NULL; serial->tiocmget = NULL; + kfree(tiocmget->serial_state_notification); + tiocmget->serial_state_notification = NULL; kfree(tiocmget); } } @@ -2615,19 +2617,26 @@ static struct hso_device *hso_create_bulk_serial_device( num_urbs = 2; serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget), GFP_KERNEL); + serial->tiocmget->serial_state_notification + = kzalloc(sizeof(struct hso_serial_state_notification), + GFP_KERNEL); /* it isn't going to break our heart if serial->tiocmget * allocation fails don't bother checking this. */ - if (serial->tiocmget) { + if (serial->tiocmget && serial->tiocmget->serial_state_notification) { tiocmget = serial->tiocmget; + tiocmget->endp = hso_get_ep(interface, + USB_ENDPOINT_XFER_INT, + USB_DIR_IN); + if (!tiocmget->endp) { + dev_err(&interface->dev, "Failed to find INT IN ep\n"); + goto exit; + } + tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL); if (tiocmget->urb) { mutex_init(&tiocmget->mutex); init_waitqueue_head(&tiocmget->waitq); - tiocmget->endp = hso_get_ep( - interface, - USB_ENDPOINT_XFER_INT, - USB_DIR_IN); } else hso_free_tiomget(serial); } diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 58f5a219fb65..62948098191f 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3782,10 +3782,14 @@ static int lan78xx_probe(struct usb_interface *intf, /* driver requires remote-wakeup capability during autosuspend. */ intf->needs_remote_wakeup = 1; + ret = lan78xx_phy_init(dev); + if (ret < 0) + goto out4; + ret = register_netdev(netdev); if (ret != 0) { netif_err(dev, probe, netdev, "couldn't register the device\n"); - goto out4; + goto out5; } usb_set_intfdata(intf, dev); @@ -3798,14 +3802,10 @@ static int lan78xx_probe(struct usb_interface *intf, pm_runtime_set_autosuspend_delay(&udev->dev, DEFAULT_AUTOSUSPEND_DELAY); - ret = lan78xx_phy_init(dev); - if (ret < 0) - goto out5; - return 0; out5: - unregister_netdev(netdev); + phy_disconnect(netdev->phydev); out4: usb_free_urb(dev->urb_intr); out3: diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b6dc5d714b5e..596428ec71df 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1327,6 +1327,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ @@ -1350,6 +1351,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ + {QMI_QUIRK_SET_DTR(0x1e2d, 0x00b0, 4)}, /* Cinterion CLS8 */ {QMI_FIXED_INTF(0x413c, 0x81a2, 8)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a3, 8)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a4, 8)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 08726090570e..cee9fef925cd 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -4799,10 +4799,9 @@ static int rtl8152_reset_resume(struct usb_interface *intf) struct r8152 *tp = usb_get_intfdata(intf); clear_bit(SELECTIVE_SUSPEND, &tp->flags); - mutex_lock(&tp->control); tp->rtl_ops.init(tp); queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); - mutex_unlock(&tp->control); + set_ethernet_addr(tp); return rtl8152_resume(intf); } diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index c5d4a0060124..681e0def6356 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -335,7 +335,7 @@ static void sr_set_multicast(struct net_device *net) static int sr_mdio_read(struct net_device *net, int phy_id, int loc) { struct usbnet *dev = netdev_priv(net); - __le16 res; + __le16 res = 0; mutex_lock(&dev->phy_mutex); sr_set_sw_mii(dev); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ba98e0971b84..5a635f028bdc 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1585,7 +1585,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Don't wait up for transmitted skbs to be freed. */ if (!use_napi) { skb_orphan(skb); - nf_reset(skb); + nf_reset_ct(skb); } /* If running out of space, stop queue to avoid getting packets that we diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index a4b38a980c3c..ee52bde058df 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -366,7 +366,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, struct neighbour *neigh; int ret; - nf_reset(skb); + nf_reset_ct(skb); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -459,7 +459,7 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, /* reset skb device */ if (likely(err == 1)) - nf_reset(skb); + nf_reset_ct(skb); else skb = NULL; @@ -560,7 +560,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s bool is_v6gw = false; int ret = -EINVAL; - nf_reset(skb); + nf_reset_ct(skb); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { @@ -670,7 +670,7 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, /* reset skb device */ if (likely(err == 1)) - nf_reset(skb); + nf_reset_ct(skb); else skb = NULL; diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index dc45d16e8d21..383d4fa555a8 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -2118,12 +2118,15 @@ static int ath10k_init_uart(struct ath10k *ar) return ret; } - if (!uart_print && ar->hw_params.uart_pin_workaround) { - ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin, - ar->hw_params.uart_pin); - if (ret) { - ath10k_warn(ar, "failed to set UART TX pin: %d", ret); - return ret; + if (!uart_print) { + if (ar->hw_params.uart_pin_workaround) { + ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin, + ar->hw_params.uart_pin); + if (ret) { + ath10k_warn(ar, "failed to set UART TX pin: %d", + ret); + return ret; + } } return 0; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 7573af2d88ce..c2db758b9d54 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -162,12 +162,13 @@ int iwl_acpi_get_mcc(struct device *dev, char *mcc) wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_WRDD_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev != 0) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } - if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { + if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER || + tbl_rev != 0) { ret = -EINVAL; goto out_free; } @@ -224,12 +225,13 @@ int iwl_acpi_get_eckv(struct device *dev, u32 *extl_clk) wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_ECKV_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev != 0) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } - if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { + if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER || + tbl_rev != 0) { ret = -EINVAL; goto out_free; } diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 5c8602de9168..87421807e040 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -646,6 +646,7 @@ static struct scatterlist *alloc_sgtable(int size) if (new_page) __free_page(new_page); } + kfree(table); return NULL; } alloc_size = min_t(int, size, PAGE_SIZE); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.h b/drivers/net/wireless/intel/iwlwifi/iwl-io.h index f8e4f0f5de0c..f09e368c7040 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.h @@ -112,38 +112,38 @@ int iwl_dump_fh(struct iwl_trans *trans, char **buf); */ static inline u32 iwl_umac_prph(struct iwl_trans *trans, u32 ofs) { - return ofs + trans->cfg->trans.umac_prph_offset; + return ofs + trans->trans_cfg->umac_prph_offset; } static inline u32 iwl_read_umac_prph_no_grab(struct iwl_trans *trans, u32 ofs) { return iwl_read_prph_no_grab(trans, ofs + - trans->cfg->trans.umac_prph_offset); + trans->trans_cfg->umac_prph_offset); } static inline u32 iwl_read_umac_prph(struct iwl_trans *trans, u32 ofs) { - return iwl_read_prph(trans, ofs + trans->cfg->trans.umac_prph_offset); + return iwl_read_prph(trans, ofs + trans->trans_cfg->umac_prph_offset); } static inline void iwl_write_umac_prph_no_grab(struct iwl_trans *trans, u32 ofs, u32 val) { - iwl_write_prph_no_grab(trans, ofs + trans->cfg->trans.umac_prph_offset, + iwl_write_prph_no_grab(trans, ofs + trans->trans_cfg->umac_prph_offset, val); } static inline void iwl_write_umac_prph(struct iwl_trans *trans, u32 ofs, u32 val) { - iwl_write_prph(trans, ofs + trans->cfg->trans.umac_prph_offset, val); + iwl_write_prph(trans, ofs + trans->trans_cfg->umac_prph_offset, val); } static inline int iwl_poll_umac_prph_bit(struct iwl_trans *trans, u32 addr, u32 bits, u32 mask, int timeout) { return iwl_poll_prph_bit(trans, addr + - trans->cfg->trans.umac_prph_offset, + trans->trans_cfg->umac_prph_offset, bits, mask, timeout); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 32a5e4e5461f..d9eb2b286438 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -420,6 +420,9 @@ static int iwl_run_unified_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) }; int ret; + if (mvm->trans->cfg->tx_with_siso_diversity) + init_cfg.init_flags |= cpu_to_le32(BIT(IWL_INIT_PHY)); + lockdep_assert_held(&mvm->mutex); mvm->rfkill_safe_init_done = false; @@ -694,12 +697,13 @@ static int iwl_mvm_sar_get_wrds_table(struct iwl_mvm *mvm) wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data, ACPI_WRDS_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev != 0) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } - if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { + if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER || + tbl_rev != 0) { ret = -EINVAL; goto out_free; } @@ -731,13 +735,14 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm) wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data, ACPI_EWRD_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev != 0) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } if ((wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) || - (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER)) { + (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER) || + tbl_rev != 0) { ret = -EINVAL; goto out_free; } @@ -791,11 +796,16 @@ static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm) wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data, ACPI_WGDS_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev > 1) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } + if (tbl_rev != 0) { + ret = -EINVAL; + goto out_free; + } + mvm->geo_rev = tbl_rev; for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) { for (j = 0; j < ACPI_GEO_TABLE_SIZE; j++) { @@ -889,15 +899,17 @@ static bool iwl_mvm_sar_geo_support(struct iwl_mvm *mvm) * firmware versions. Unfortunately, we don't have a TLV API * flag to rely on, so rely on the major version which is in * the first byte of ucode_ver. This was implemented - * initially on version 38 and then backported to29 and 17. - * The intention was to have it in 36 as well, but not all - * 8000 family got this feature enabled. The 8000 family is - * the only one using version 36, so skip this version - * entirely. + * initially on version 38 and then backported to 17. It was + * also backported to 29, but only for 7265D devices. The + * intention was to have it in 36 as well, but not all 8000 + * family got this feature enabled. The 8000 family is the + * only one using version 36, so skip this version entirely. */ return IWL_UCODE_SERIAL(mvm->fw->ucode_ver) >= 38 || - IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 29 || - IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 17; + IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 17 || + (IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 29 && + ((mvm->trans->hw_rev & CSR_HW_REV_TYPE_MSK) == + CSR_HW_REV_TYPE_7265D)); } int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm) @@ -1020,11 +1032,16 @@ static int iwl_mvm_get_ppag_table(struct iwl_mvm *mvm) wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data, ACPI_PPAG_WIFI_DATA_SIZE, &tbl_rev); - if (IS_ERR(wifi_pkg) || tbl_rev != 0) { + if (IS_ERR(wifi_pkg)) { ret = PTR_ERR(wifi_pkg); goto out_free; } + if (tbl_rev != 0) { + ret = -EINVAL; + goto out_free; + } + enabled = &wifi_pkg->package.elements[1]; if (enabled->type != ACPI_TYPE_INTEGER || (enabled->integer.value != 0 && enabled->integer.value != 1)) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index cd1b10042fbf..d31f96c3f925 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4881,11 +4881,11 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, if (!iwl_mvm_has_new_rx_api(mvm)) return; - notif->cookie = mvm->queue_sync_cookie; - - if (notif->sync) + if (notif->sync) { + notif->cookie = mvm->queue_sync_cookie; atomic_set(&mvm->queue_sync_counter, mvm->trans->num_rx_queues); + } ret = iwl_mvm_notify_rx_queue(mvm, qmask, (u8 *)notif, size, !notif->sync); @@ -4905,7 +4905,8 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm, out: atomic_set(&mvm->queue_sync_counter, 0); - mvm->queue_sync_cookie++; + if (notif->sync) + mvm->queue_sync_cookie++; } static void iwl_mvm_sync_rx_queues(struct ieee80211_hw *hw) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c index 75fa8a6aafee..74980382e64c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c @@ -107,13 +107,9 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, /* allocate ucode sections in dram and set addresses */ ret = iwl_pcie_init_fw_sec(trans, fw, &prph_scratch->dram); - if (ret) { - dma_free_coherent(trans->dev, - sizeof(*prph_scratch), - prph_scratch, - trans_pcie->prph_scratch_dma_addr); - return ret; - } + if (ret) + goto err_free_prph_scratch; + /* Allocate prph information * currently we don't assign to the prph info anything, but it would get @@ -121,16 +117,20 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, prph_info = dma_alloc_coherent(trans->dev, sizeof(*prph_info), &trans_pcie->prph_info_dma_addr, GFP_KERNEL); - if (!prph_info) - return -ENOMEM; + if (!prph_info) { + ret = -ENOMEM; + goto err_free_prph_scratch; + } /* Allocate context info */ ctxt_info_gen3 = dma_alloc_coherent(trans->dev, sizeof(*ctxt_info_gen3), &trans_pcie->ctxt_info_dma_addr, GFP_KERNEL); - if (!ctxt_info_gen3) - return -ENOMEM; + if (!ctxt_info_gen3) { + ret = -ENOMEM; + goto err_free_prph_info; + } ctxt_info_gen3->prph_info_base_addr = cpu_to_le64(trans_pcie->prph_info_dma_addr); @@ -186,6 +186,20 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, iwl_set_bit(trans, CSR_GP_CNTRL, CSR_AUTO_FUNC_INIT); return 0; + +err_free_prph_info: + dma_free_coherent(trans->dev, + sizeof(*prph_info), + prph_info, + trans_pcie->prph_info_dma_addr); + +err_free_prph_scratch: + dma_free_coherent(trans->dev, + sizeof(*prph_scratch), + prph_scratch, + trans_pcie->prph_scratch_dma_addr); + return ret; + } void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index e29c47744ef5..6f4bb7ce71a5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -513,31 +513,33 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x9074, iwl8265_2ac_cfg)}, /* 9000 Series */ - {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x00A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0230, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0234, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0238, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x023C, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0260, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x0264, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x02A0, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x1551, iwl9560_killer_s_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x1552, iwl9560_killer_i_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, - {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)}, + {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x06F0, 0x0030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x0034, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)}, {IWL_PCI_DEVICE(0x06F0, 0x0038, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)}, @@ -643,34 +645,34 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x2720, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x003C, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0064, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x00A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x00A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0230, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0238, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x023C, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0260, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x0264, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x02A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x02A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x1010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x1210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x30DC, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x4034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_soc)}, + + {IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_160_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x0034, iwl9560_2ac_cfg_shared_clk)}, {IWL_PCI_DEVICE(0x31DC, 0x0038, iwl9560_2ac_160_cfg_shared_clk)}, @@ -726,62 +728,60 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x34F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, {IWL_PCI_DEVICE(0x34F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x00A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0230, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0238, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x023C, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0260, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x0264, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x02A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x02A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x1010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x1210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x3DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x00A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0230, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0238, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x023C, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0260, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x0264, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x02A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x02A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x1010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x1210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x43F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_soc)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + + {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_160_cfg_soc)}, @@ -821,34 +821,34 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x40A4, iwl9462_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x4234, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0x9DF0, 0x42A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x00A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0230, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0238, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x023C, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0260, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x0264, iwl9461_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x02A0, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x02A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x1010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x1210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0xA0F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x1552, iwl9560_killer_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_160_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_soc)}, - {IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_soc)}, + + {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)}, + {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_160_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x0034, iwl9560_2ac_cfg_soc)}, {IWL_PCI_DEVICE(0xA370, 0x0038, iwl9560_2ac_160_cfg_soc)}, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index f8a1f985a1d8..6961f00ff812 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3272,11 +3272,17 @@ static struct iwl_trans_dump_data ptr = cmdq->write_ptr; for (i = 0; i < cmdq->n_window; i++) { u8 idx = iwl_pcie_get_cmd_index(cmdq, ptr); + u8 tfdidx; u32 caplen, cmdlen; + if (trans->trans_cfg->use_tfh) + tfdidx = idx; + else + tfdidx = ptr; + cmdlen = iwl_trans_pcie_get_cmdlen(trans, - cmdq->tfds + - tfd_size * ptr); + (u8 *)cmdq->tfds + + tfd_size * tfdidx); caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen); if (cmdlen) { @@ -3450,6 +3456,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, spin_lock_init(&trans_pcie->reg_lock); mutex_init(&trans_pcie->mutex); init_waitqueue_head(&trans_pcie->ucode_write_waitq); + + trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!trans_pcie->rba.alloc_wq) { + ret = -ENOMEM; + goto out_free_trans; + } + INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); + trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page); if (!trans_pcie->tso_hdr_page) { ret = -ENOMEM; @@ -3584,10 +3599,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans_pcie->inta_mask = CSR_INI_SET_MASK; } - trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", - WQ_HIGHPRI | WQ_UNBOUND, 1); - INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); - #ifdef CONFIG_IWLWIFI_DEBUGFS trans_pcie->fw_mon_data.state = IWL_FW_MON_DBGFS_STATE_CLOSED; mutex_init(&trans_pcie->fw_mon_data.mutex); @@ -3599,6 +3610,8 @@ out_free_ict: iwl_pcie_free_ict(trans); out_no_pci: free_percpu(trans_pcie->tso_hdr_page); + destroy_workqueue(trans_pcie->rba.alloc_wq); +out_free_trans: iwl_trans_free(trans); return ERR_PTR(ret); } diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 635956024e88..14f562cd715c 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1261,8 +1261,8 @@ static bool mac80211_hwsim_tx_frame_no_nl(struct ieee80211_hw *hw, skb_orphan(skb); skb_dst_drop(skb); skb->mark = 0; - secpath_reset(skb); - nf_reset(skb); + skb_ext_reset(skb); + nf_reset_ct(skb); /* * Get absolute mactime here so all HWs RX at the "same time", and @@ -4026,7 +4026,7 @@ static int __init init_mac80211_hwsim(void) err = dev_alloc_name(hwsim_mon, hwsim_mon->name); if (err < 0) { rtnl_unlock(); - goto out_free_radios; + goto out_free_mon; } err = register_netdevice(hwsim_mon); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h index 2b216edd0c7d..a90a518b40d3 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h @@ -23,7 +23,6 @@ #include <linux/leds.h> #include <linux/mutex.h> #include <linux/etherdevice.h> -#include <linux/input-polldev.h> #include <linux/kfifo.h> #include <linux/hrtimer.h> #include <linux/average.h> diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c index 4d4e3888ef20..f2395309ec00 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c @@ -555,7 +555,7 @@ static ssize_t rt2x00debug_write_restart_hw(struct file *file, { struct rt2x00debug_intf *intf = file->private_data; struct rt2x00_dev *rt2x00dev = intf->rt2x00dev; - static unsigned long last_reset; + static unsigned long last_reset = INITIAL_JIFFIES; if (!rt2x00_has_cap_restart_hw(rt2x00dev)) return -EOPNOTSUPP; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 240f762b3749..103ed00775eb 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -719,7 +719,6 @@ err_unmap: xenvif_unmap_frontend_data_rings(queue); netif_napi_del(&queue->napi); err: - module_put(THIS_MODULE); return err; } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e14ec75b61d6..482c6c8b0fb7 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -887,9 +887,9 @@ static int xennet_set_skb_gso(struct sk_buff *skb, return 0; } -static RING_IDX xennet_fill_frags(struct netfront_queue *queue, - struct sk_buff *skb, - struct sk_buff_head *list) +static int xennet_fill_frags(struct netfront_queue *queue, + struct sk_buff *skb, + struct sk_buff_head *list) { RING_IDX cons = queue->rx.rsp_cons; struct sk_buff *nskb; @@ -908,7 +908,7 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { queue->rx.rsp_cons = ++cons + skb_queue_len(list); kfree_skb(nskb); - return ~0U; + return -ENOENT; } skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, @@ -919,7 +919,9 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue, kfree_skb(nskb); } - return cons; + queue->rx.rsp_cons = cons; + + return 0; } static int checksum_setup(struct net_device *dev, struct sk_buff *skb) @@ -1045,8 +1047,7 @@ err: skb->data_len = rx->status; skb->len += rx->status; - i = xennet_fill_frags(queue, skb, &tmpq); - if (unlikely(i == ~0U)) + if (unlikely(xennet_fill_frags(queue, skb, &tmpq))) goto err; if (rx->flags & XEN_NETRXF_csum_blank) @@ -1056,7 +1057,7 @@ err: __skb_queue_tail(&rxq, skb); - queue->rx.rsp_cons = ++i; + i = ++queue->rx.rsp_cons; work_done++; } diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index c5289eaf17ee..e897e4d768ef 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -547,18 +547,25 @@ static int pn533_usb_probe(struct usb_interface *interface, rc = pn533_finalize_setup(priv); if (rc) - goto error; + goto err_deregister; usb_set_intfdata(interface, phy); return 0; +err_deregister: + pn533_unregister_device(phy->priv); error: + usb_kill_urb(phy->in_urb); + usb_kill_urb(phy->out_urb); + usb_kill_urb(phy->ack_urb); + usb_free_urb(phy->in_urb); usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); usb_put_dev(phy->udev); kfree(in_buf); + kfree(phy->ack_buffer); return rc; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 108f60b46804..fa7ba09dca77 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -102,10 +102,13 @@ static void nvme_set_queue_dying(struct nvme_ns *ns) */ if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) return; - revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); + /* + * Revalidate after unblocking dispatchers that may be holding bd_butex + */ + revalidate_disk(ns->disk); } static void nvme_queue_scan(struct nvme_ctrl *ctrl) @@ -113,10 +116,26 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl) /* * Only new queue scan work when admin and IO queues are both alive */ - if (ctrl->state == NVME_CTRL_LIVE) + if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset) queue_work(nvme_wq, &ctrl->scan_work); } +/* + * Use this function to proceed with scheduling reset_work for a controller + * that had previously been set to the resetting state. This is intended for + * code paths that can't be interrupted by other reset attempts. A hot removal + * may prevent this from succeeding. + */ +int nvme_try_sched_reset(struct nvme_ctrl *ctrl) +{ + if (ctrl->state != NVME_CTRL_RESETTING) + return -EBUSY; + if (!queue_work(nvme_reset_wq, &ctrl->reset_work)) + return -EBUSY; + return 0; +} +EXPORT_SYMBOL_GPL(nvme_try_sched_reset); + int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -134,8 +153,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) ret = nvme_reset_ctrl(ctrl); if (!ret) { flush_work(&ctrl->reset_work); - if (ctrl->state != NVME_CTRL_LIVE && - ctrl->state != NVME_CTRL_ADMIN_ONLY) + if (ctrl->state != NVME_CTRL_LIVE) ret = -ENETRESET; } @@ -312,15 +330,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, old_state = ctrl->state; switch (new_state) { - case NVME_CTRL_ADMIN_ONLY: - switch (old_state) { - case NVME_CTRL_CONNECTING: - changed = true; - /* FALLTHRU */ - default: - break; - } - break; case NVME_CTRL_LIVE: switch (old_state) { case NVME_CTRL_NEW: @@ -336,7 +345,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, switch (old_state) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: - case NVME_CTRL_ADMIN_ONLY: changed = true; /* FALLTHRU */ default: @@ -356,7 +364,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_DELETING: switch (old_state) { case NVME_CTRL_LIVE: - case NVME_CTRL_ADMIN_ONLY: case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: changed = true; @@ -378,8 +385,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, break; } - if (changed) + if (changed) { ctrl->state = new_state; + wake_up_all(&ctrl->state_wq); + } spin_unlock_irqrestore(&ctrl->lock, flags); if (changed && ctrl->state == NVME_CTRL_LIVE) @@ -388,6 +397,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, } EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); +/* + * Returns true for sink states that can't ever transition back to live. + */ +static bool nvme_state_terminal(struct nvme_ctrl *ctrl) +{ + switch (ctrl->state) { + case NVME_CTRL_NEW: + case NVME_CTRL_LIVE: + case NVME_CTRL_RESETTING: + case NVME_CTRL_CONNECTING: + return false; + case NVME_CTRL_DELETING: + case NVME_CTRL_DEAD: + return true; + default: + WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state); + return true; + } +} + +/* + * Waits for the controller state to be resetting, or returns false if it is + * not possible to ever transition to that state. + */ +bool nvme_wait_reset(struct nvme_ctrl *ctrl) +{ + wait_event(ctrl->state_wq, + nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) || + nvme_state_terminal(ctrl)); + return ctrl->state == NVME_CTRL_RESETTING; +} +EXPORT_SYMBOL_GPL(nvme_wait_reset); + static void nvme_free_ns_head(struct kref *ref) { struct nvme_ns_head *head = @@ -847,7 +889,7 @@ out: static int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - u32 meta_seed, u32 *result, unsigned timeout) + u32 meta_seed, u64 *result, unsigned timeout) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -888,7 +930,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, else ret = nvme_req(req)->status; if (result) - *result = le32_to_cpu(nvme_req(req)->result.u32); + *result = le64_to_cpu(nvme_req(req)->result.u64); if (meta && !ret && !write) { if (copy_to_user(meta_buffer, meta, meta_len)) ret = -EFAULT; @@ -1303,8 +1345,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl) if (ns->disk && nvme_revalidate_disk(ns->disk)) nvme_set_queue_dying(ns); up_read(&ctrl->namespaces_rwsem); - - nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL); } static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) @@ -1320,6 +1360,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) nvme_unfreeze(ctrl); nvme_mpath_unfreeze(ctrl->subsys); mutex_unlock(&ctrl->subsys->lock); + nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL); mutex_unlock(&ctrl->scan_lock); } if (effects & NVME_CMD_EFFECTS_CCC) @@ -1335,6 +1376,54 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_command c; unsigned timeout = 0; u32 effects; + u64 result; + int status; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (copy_from_user(&cmd, ucmd, sizeof(cmd))) + return -EFAULT; + if (cmd.flags) + return -EINVAL; + + memset(&c, 0, sizeof(c)); + c.common.opcode = cmd.opcode; + c.common.flags = cmd.flags; + c.common.nsid = cpu_to_le32(cmd.nsid); + c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); + c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); + c.common.cdw10 = cpu_to_le32(cmd.cdw10); + c.common.cdw11 = cpu_to_le32(cmd.cdw11); + c.common.cdw12 = cpu_to_le32(cmd.cdw12); + c.common.cdw13 = cpu_to_le32(cmd.cdw13); + c.common.cdw14 = cpu_to_le32(cmd.cdw14); + c.common.cdw15 = cpu_to_le32(cmd.cdw15); + + if (cmd.timeout_ms) + timeout = msecs_to_jiffies(cmd.timeout_ms); + + effects = nvme_passthru_start(ctrl, ns, cmd.opcode); + status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, + (void __user *)(uintptr_t)cmd.addr, cmd.data_len, + (void __user *)(uintptr_t)cmd.metadata, + cmd.metadata_len, 0, &result, timeout); + nvme_passthru_end(ctrl, effects); + + if (status >= 0) { + if (put_user(result, &ucmd->result)) + return -EFAULT; + } + + return status; +} + +static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + struct nvme_passthru_cmd64 __user *ucmd) +{ + struct nvme_passthru_cmd64 cmd; + struct nvme_command c; + unsigned timeout = 0; + u32 effects; int status; if (!capable(CAP_SYS_ADMIN)) @@ -1405,6 +1494,41 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) srcu_read_unlock(&head->srcu, idx); } +static bool is_ctrl_ioctl(unsigned int cmd) +{ + if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) + return true; + if (is_sed_ioctl(cmd)) + return true; + return false; +} + +static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, + void __user *argp, + struct nvme_ns_head *head, + int srcu_idx) +{ + struct nvme_ctrl *ctrl = ns->ctrl; + int ret; + + nvme_get_ctrl(ns->ctrl); + nvme_put_ns_from_disk(head, srcu_idx); + + switch (cmd) { + case NVME_IOCTL_ADMIN_CMD: + ret = nvme_user_cmd(ctrl, NULL, argp); + break; + case NVME_IOCTL_ADMIN64_CMD: + ret = nvme_user_cmd64(ctrl, NULL, argp); + break; + default: + ret = sed_ioctl(ctrl->opal_dev, cmd, argp); + break; + } + nvme_put_ctrl(ctrl); + return ret; +} + static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1422,20 +1546,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, * seperately and drop the ns SRCU reference early. This avoids a * deadlock when deleting namespaces using the passthrough interface. */ - if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) { - struct nvme_ctrl *ctrl = ns->ctrl; - - nvme_get_ctrl(ns->ctrl); - nvme_put_ns_from_disk(head, srcu_idx); - - if (cmd == NVME_IOCTL_ADMIN_CMD) - ret = nvme_user_cmd(ctrl, NULL, argp); - else - ret = sed_ioctl(ctrl->opal_dev, cmd, argp); - - nvme_put_ctrl(ctrl); - return ret; - } + if (is_ctrl_ioctl(cmd)) + return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); switch (cmd) { case NVME_IOCTL_ID: @@ -1448,6 +1560,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, case NVME_IOCTL_SUBMIT_IO: ret = nvme_submit_io(ns, argp); break; + case NVME_IOCTL_IO64_CMD: + ret = nvme_user_cmd64(ns->ctrl, ns, argp); + break; default: if (ns->ndev) ret = nvme_nvm_ioctl(ns, cmd, arg); @@ -2289,6 +2404,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = { .vid = 0x14a4, .fr = "22301111", .quirks = NVME_QUIRK_SIMPLE_SUSPEND, + }, + { + /* + * This Kingston E8FK11.T firmware version has no interrupt + * after resume with actions related to suspend to idle + * https://bugzilla.kernel.org/show_bug.cgi?id=204887 + */ + .vid = 0x2646, + .fr = "E8FK11.T", + .quirks = NVME_QUIRK_SIMPLE_SUSPEND, } }; @@ -2540,8 +2665,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) list_add_tail(&subsys->entry, &nvme_subsystems); } - if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, - dev_name(ctrl->device))) { + ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, + dev_name(ctrl->device)); + if (ret) { dev_err(ctrl->device, "failed to create sysfs link from subsystem.\n"); goto out_put_subsystem; @@ -2786,7 +2912,6 @@ static int nvme_dev_open(struct inode *inode, struct file *file) switch (ctrl->state) { case NVME_CTRL_LIVE: - case NVME_CTRL_ADMIN_ONLY: break; default: return -EWOULDBLOCK; @@ -2838,6 +2963,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case NVME_IOCTL_ADMIN_CMD: return nvme_user_cmd(ctrl, NULL, argp); + case NVME_IOCTL_ADMIN64_CMD: + return nvme_user_cmd64(ctrl, NULL, argp); case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: @@ -3045,6 +3172,8 @@ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); nvme_show_int_function(cntlid); nvme_show_int_function(numa_node); +nvme_show_int_function(queue_count); +nvme_show_int_function(sqsize); static ssize_t nvme_sysfs_delete(struct device *dev, struct device_attribute *attr, const char *buf, @@ -3076,7 +3205,6 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, static const char *const state_name[] = { [NVME_CTRL_NEW] = "new", [NVME_CTRL_LIVE] = "live", - [NVME_CTRL_ADMIN_ONLY] = "only-admin", [NVME_CTRL_RESETTING] = "resetting", [NVME_CTRL_CONNECTING] = "connecting", [NVME_CTRL_DELETING] = "deleting", @@ -3125,6 +3253,8 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_address.attr, &dev_attr_state.attr, &dev_attr_numa_node.attr, + &dev_attr_queue_count.attr, + &dev_attr_sqsize.attr, NULL }; @@ -3585,11 +3715,10 @@ static void nvme_scan_work(struct work_struct *work) struct nvme_id_ctrl *id; unsigned nn; - if (ctrl->state != NVME_CTRL_LIVE) + /* No tagset on a live ctrl means IO queues could not created */ + if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) return; - WARN_ON_ONCE(!ctrl->tagset); - if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { dev_info(ctrl->device, "rescanning namespaces.\n"); nvme_clear_changed_ns_log(ctrl); @@ -3750,13 +3879,13 @@ static void nvme_fw_act_work(struct work_struct *work) if (time_after(jiffies, fw_act_timeout)) { dev_warn(ctrl->device, "Fw activation timeout, reset controller\n"); - nvme_reset_ctrl(ctrl); - break; + nvme_try_sched_reset(ctrl); + return; } msleep(100); } - if (ctrl->state != NVME_CTRL_LIVE) + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) return; nvme_start_queues(ctrl); @@ -3776,7 +3905,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) nvme_queue_scan(ctrl); break; case NVME_AER_NOTICE_FW_ACT_STARTING: - queue_work(nvme_wq, &ctrl->fw_act_work); + /* + * We are (ab)using the RESETTING state to prevent subsequent + * recovery actions from interfering with the controller's + * firmware activation. + */ + if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) + queue_work(nvme_wq, &ctrl->fw_act_work); break; #ifdef CONFIG_NVME_MULTIPATH case NVME_AER_NOTICE_ANA: @@ -3899,6 +4034,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work); + init_waitqueue_head(&ctrl->state_wq); INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work); memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 93f08d77c896..a0ec40ab62ee 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -182,8 +182,7 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live) { - if (likely(ctrl->state == NVME_CTRL_LIVE || - ctrl->state == NVME_CTRL_ADMIN_ONLY)) + if (likely(ctrl->state == NVME_CTRL_LIVE)) return true; return __nvmf_check_ready(ctrl, rq, queue_live); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b5013c101b35..22e8401352c2 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -15,6 +15,7 @@ #include <linux/sed-opal.h> #include <linux/fault-inject.h> #include <linux/rcupdate.h> +#include <linux/wait.h> #include <trace/events/block.h> @@ -161,7 +162,6 @@ static inline u16 nvme_req_qid(struct request *req) enum nvme_ctrl_state { NVME_CTRL_NEW, NVME_CTRL_LIVE, - NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */ NVME_CTRL_RESETTING, NVME_CTRL_CONNECTING, NVME_CTRL_DELETING, @@ -199,6 +199,7 @@ struct nvme_ctrl { struct cdev cdev; struct work_struct reset_work; struct work_struct delete_work; + wait_queue_head_t state_wq; struct nvme_subsystem *subsys; struct list_head subsys_entry; @@ -221,6 +222,7 @@ struct nvme_ctrl { u16 oacs; u16 nssa; u16 nr_streams; + u16 sqsize; u32 max_namespaces; atomic_t abort_limit; u8 vwc; @@ -269,7 +271,6 @@ struct nvme_ctrl { u16 hmmaxd; /* Fabrics only */ - u16 sqsize; u32 ioccsz; u32 iorcsz; u16 icdoff; @@ -449,6 +450,7 @@ void nvme_complete_rq(struct request *req); bool nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); +bool nvme_wait_reset(struct nvme_ctrl *ctrl); int nvme_disable_ctrl(struct nvme_ctrl *ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); @@ -499,6 +501,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); +int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c0808f9eb8ab..869f462e6b6e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -773,7 +773,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, struct bio_vec *bv) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset; + unsigned int offset = bv->bv_offset & (dev->ctrl.page_size - 1); + unsigned int first_prp_len = dev->ctrl.page_size - offset; iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); if (dma_mapping_error(dev->dev, iod->first_dma)) @@ -2263,10 +2264,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) return true; } -/* - * return error value only when tagset allocation failed - */ -static int nvme_dev_add(struct nvme_dev *dev) +static void nvme_dev_add(struct nvme_dev *dev) { int ret; @@ -2296,7 +2294,7 @@ static int nvme_dev_add(struct nvme_dev *dev) if (ret) { dev_warn(dev->ctrl.device, "IO queues tagset allocation failed %d\n", ret); - return ret; + return; } dev->ctrl.tagset = &dev->tagset; } else { @@ -2307,7 +2305,6 @@ static int nvme_dev_add(struct nvme_dev *dev) } nvme_dbbuf_set(dev); - return 0; } static int nvme_pci_enable(struct nvme_dev *dev) @@ -2467,6 +2464,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) mutex_unlock(&dev->shutdown_lock); } +static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown) +{ + if (!nvme_wait_reset(&dev->ctrl)) + return -EBUSY; + nvme_dev_disable(dev, shutdown); + return 0; +} + static int nvme_setup_prp_pools(struct nvme_dev *dev) { dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, @@ -2490,14 +2495,20 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) dma_pool_destroy(dev->prp_small_pool); } +static void nvme_free_tagset(struct nvme_dev *dev) +{ + if (dev->tagset.tags) + blk_mq_free_tag_set(&dev->tagset); + dev->ctrl.tagset = NULL; +} + static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); nvme_dbbuf_dma_free(dev); put_device(dev->dev); - if (dev->tagset.tags) - blk_mq_free_tag_set(&dev->tagset); + nvme_free_tagset(dev); if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); @@ -2508,6 +2519,11 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) static void nvme_remove_dead_ctrl(struct nvme_dev *dev) { + /* + * Set state to deleting now to avoid blocking nvme_wait_reset(), which + * may be holding this pci_dev's device lock. + */ + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); nvme_kill_queues(&dev->ctrl); @@ -2521,7 +2537,6 @@ static void nvme_reset_work(struct work_struct *work) container_of(work, struct nvme_dev, ctrl.reset_work); bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result; - enum nvme_ctrl_state new_state = NVME_CTRL_LIVE; if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) { result = -ENODEV; @@ -2615,13 +2630,11 @@ static void nvme_reset_work(struct work_struct *work) dev_warn(dev->ctrl.device, "IO queues not created\n"); nvme_kill_queues(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); - new_state = NVME_CTRL_ADMIN_ONLY; + nvme_free_tagset(dev); } else { nvme_start_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); - /* hit this only when allocate tagset fails */ - if (nvme_dev_add(dev)) - new_state = NVME_CTRL_ADMIN_ONLY; + nvme_dev_add(dev); nvme_unfreeze(&dev->ctrl); } @@ -2629,9 +2642,9 @@ static void nvme_reset_work(struct work_struct *work) * If only admin queue live, keep it to do further investigation or * recovery. */ - if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) { + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { dev_warn(dev->ctrl.device, - "failed to mark controller state %d\n", new_state); + "failed to mark controller live state\n"); result = -ENODEV; goto out; } @@ -2672,7 +2685,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) { - *val = readq(to_nvme_dev(ctrl)->bar + off); + *val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off); return 0; } @@ -2836,19 +2849,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) static void nvme_reset_prepare(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - nvme_dev_disable(dev, false); + + /* + * We don't need to check the return value from waiting for the reset + * state as pci_dev device lock is held, making it impossible to race + * with ->remove(). + */ + nvme_disable_prepare_reset(dev, false); + nvme_sync_queues(&dev->ctrl); } static void nvme_reset_done(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - nvme_reset_ctrl_sync(&dev->ctrl); + + if (!nvme_try_sched_reset(&dev->ctrl)) + flush_work(&dev->ctrl.reset_work); } static void nvme_shutdown(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - nvme_dev_disable(dev, true); + nvme_disable_prepare_reset(dev, true); } /* @@ -2901,7 +2923,7 @@ static int nvme_resume(struct device *dev) if (ndev->last_ps == U32_MAX || nvme_set_power_state(ctrl, ndev->last_ps) != 0) - nvme_reset_ctrl(ctrl); + return nvme_try_sched_reset(&ndev->ctrl); return 0; } @@ -2929,43 +2951,42 @@ static int nvme_suspend(struct device *dev) */ if (pm_suspend_via_firmware() || !ctrl->npss || !pcie_aspm_enabled(pdev) || - (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) { - nvme_dev_disable(ndev, true); - return 0; - } + (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) + return nvme_disable_prepare_reset(ndev, true); nvme_start_freeze(ctrl); nvme_wait_freeze(ctrl); nvme_sync_queues(ctrl); - if (ctrl->state != NVME_CTRL_LIVE && - ctrl->state != NVME_CTRL_ADMIN_ONLY) + if (ctrl->state != NVME_CTRL_LIVE) goto unfreeze; ret = nvme_get_power_state(ctrl, &ndev->last_ps); if (ret < 0) goto unfreeze; + /* + * A saved state prevents pci pm from generically controlling the + * device's power. If we're using protocol specific settings, we don't + * want pci interfering. + */ + pci_save_state(pdev); + ret = nvme_set_power_state(ctrl, ctrl->npss); if (ret < 0) goto unfreeze; if (ret) { + /* discard the saved state */ + pci_load_saved_state(pdev, NULL); + /* * Clearing npss forces a controller reset on resume. The * correct value will be resdicovered then. */ - nvme_dev_disable(ndev, true); + ret = nvme_disable_prepare_reset(ndev, true); ctrl->npss = 0; - ret = 0; - goto unfreeze; } - /* - * A saved state prevents pci pm from generically controlling the - * device's power. If we're using protocol specific settings, we don't - * want pci interfering. - */ - pci_save_state(pdev); unfreeze: nvme_unfreeze(ctrl); return ret; @@ -2974,9 +2995,7 @@ unfreeze: static int nvme_simple_suspend(struct device *dev) { struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); - - nvme_dev_disable(ndev, true); - return 0; + return nvme_disable_prepare_reset(ndev, true); } static int nvme_simple_resume(struct device *dev) @@ -2984,8 +3003,7 @@ static int nvme_simple_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); - nvme_reset_ctrl(&ndev->ctrl); - return 0; + return nvme_try_sched_reset(&ndev->ctrl); } static const struct dev_pm_ops nvme_dev_pm_ops = { @@ -3090,6 +3108,9 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index dfa07bb9dfeb..f19a28b4e997 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -427,7 +427,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) { return min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ibdev->attrs.max_fast_reg_page_list_len); + ibdev->attrs.max_fast_reg_page_list_len - 1); } static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) @@ -437,7 +437,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) const int cq_factor = send_wr_factor + 1; /* + RECV */ int comp_vector, idx = nvme_rdma_queue_idx(queue); enum ib_poll_context poll_ctx; - int ret; + int ret, pages_per_mr; queue->device = nvme_rdma_find_get_device(queue->cm_id); if (!queue->device) { @@ -479,10 +479,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_qp; } + /* + * Currently we don't use SG_GAPS MR's so if the first entry is + * misaligned we'll end up using two entries for a single data page, + * so one additional entry is required. + */ + pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1; ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, queue->queue_size, IB_MR_TYPE_MEM_REG, - nvme_rdma_get_max_fr_pages(ibdev), 0); + pages_per_mr, 0); if (ret) { dev_err(queue->ctrl->ctrl.device, "failed to initialize MR pool sized %d for QID %d\n", @@ -614,7 +620,8 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) if (!ret) { set_bit(NVME_RDMA_Q_LIVE, &queue->flags); } else { - __nvme_rdma_stop_queue(queue); + if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) + __nvme_rdma_stop_queue(queue); dev_info(ctrl->ctrl.device, "failed to connect queue: %d ret=%d\n", idx, ret); } @@ -820,8 +827,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) goto out_stop_queue; - ctrl->ctrl.max_hw_sectors = - (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); + ctrl->ctrl.max_segments = ctrl->max_fr_pages; + ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); @@ -1694,6 +1701,14 @@ nvme_rdma_timeout(struct request *rq, bool reserved) dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", rq->tag, nvme_rdma_queue_idx(queue)); + /* + * Restart the timer if a controller reset is already scheduled. Any + * timed out commands would be handled before entering the connecting + * state. + */ + if (ctrl->ctrl.state == NVME_CTRL_RESETTING) + return BLK_EH_RESET_TIMER; + if (ctrl->ctrl.state != NVME_CTRL_LIVE) { /* * Teardown immediately if controller times out while starting diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4ffd5957637a..770dbcbc999e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1042,7 +1042,7 @@ static void nvme_tcp_io_work(struct work_struct *w) { struct nvme_tcp_queue *queue = container_of(w, struct nvme_tcp_queue, io_work); - unsigned long start = jiffies + msecs_to_jiffies(1); + unsigned long deadline = jiffies + msecs_to_jiffies(1); do { bool pending = false; @@ -1067,7 +1067,7 @@ static void nvme_tcp_io_work(struct work_struct *w) if (!pending) return; - } while (time_after(jiffies, start)); /* quota is exhausted */ + } while (!time_after(jiffies, deadline)); /* quota is exhausted */ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } @@ -1386,7 +1386,9 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; queue->sock->sk->sk_state_change = nvme_tcp_state_change; queue->sock->sk->sk_write_space = nvme_tcp_write_space; +#ifdef CONFIG_NET_RX_BUSY_POLL queue->sock->sk->sk_ll_usec = 1; +#endif write_unlock_bh(&queue->sock->sk->sk_callback_lock); return 0; @@ -2044,6 +2046,14 @@ nvme_tcp_timeout(struct request *rq, bool reserved) struct nvme_tcp_ctrl *ctrl = req->queue->ctrl; struct nvme_tcp_cmd_pdu *pdu = req->pdu; + /* + * Restart the timer if a controller reset is already scheduled. Any + * timed out commands would be handled before entering the connecting + * state. + */ + if (ctrl->ctrl.state == NVME_CTRL_RESETTING) + return BLK_EH_RESET_TIMER; + dev_warn(ctrl->ctrl.device, "queue %d: timeout request %#x type %d\n", nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type); @@ -2126,6 +2136,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, ret = nvme_tcp_map_data(queue, rq); if (unlikely(ret)) { + nvme_cleanup_cmd(rq); dev_err(queue->ctrl->ctrl.device, "Failed to map data (%d)\n", ret); return ret; diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index de0bff70ebb6..32008d85172b 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -11,10 +11,10 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) { const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; - /* Number of physical blocks per logical block. */ - const u32 ppl = ql->physical_block_size / ql->logical_block_size; - /* Physical blocks per logical block, 0's based. */ - const __le16 ppl0b = to0based(ppl); + /* Number of logical blocks per physical block. */ + const u32 lpp = ql->physical_block_size / ql->logical_block_size; + /* Logical blocks per physical block, 0's based. */ + const __le16 lpp0b = to0based(lpp); /* * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, @@ -25,9 +25,9 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) * field from the identify controller data structure should be used. */ id->nsfeat |= 1 << 1; - id->nawun = ppl0b; - id->nawupf = ppl0b; - id->nacwu = ppl0b; + id->nawun = lpp0b; + id->nawupf = lpp0b; + id->nacwu = lpp0b; /* * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and @@ -36,7 +36,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) */ id->nsfeat |= 1 << 4; /* NPWG = Namespace Preferred Write Granularity. 0's based */ - id->npwg = ppl0b; + id->npwg = lpp0b; /* NPWA = Namespace Preferred Write Alignment. 0's based */ id->npwa = id->npwg; /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 748a39fca771..11f5aea97d1b 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -157,8 +157,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, iod->sg_table.sgl = iod->first_sgl; if (sg_alloc_table_chained(&iod->sg_table, blk_rq_nr_phys_segments(req), - iod->sg_table.sgl, SG_CHUNK_SIZE)) + iod->sg_table.sgl, SG_CHUNK_SIZE)) { + nvme_cleanup_cmd(req); return BLK_STS_RESOURCE; + } iod->req.sg = iod->sg_table.sgl; iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index bf4f03474e89..d535080b781f 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -348,8 +348,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) return 0; err: - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); return NVME_SC_INTERNAL; } @@ -554,8 +553,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) if (queue->nvme_sq.sqhd_disabled) { kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); } return 1; @@ -586,8 +584,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, return -EAGAIN; kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); cmd->queue->snd_cmd = NULL; nvmet_tcp_put_cmd(cmd); return 1; @@ -1310,8 +1307,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) nvmet_req_uninit(&cmd->req); nvmet_tcp_unmap_pdu_iovec(cmd); kfree(cmd->iov); - if (cmd->req.sg_cnt) - sgl_free(cmd->req.sg); + sgl_free(cmd->req.sg); } static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index ed50502cc65a..de8e4e347249 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -678,14 +678,6 @@ static int sba_dma_supported( struct device *dev, u64 mask) return(0); } - /* Documentation/DMA-API-HOWTO.txt tells drivers to try 64-bit - * first, then fall back to 32-bit if that fails. - * We are just "encouraging" 32-bit DMA masks here since we can - * never allow IOMMU bypass unless we add special support for ZX1. - */ - if (mask > ~0U) - return 0; - ioc = GET_IOC(dev); if (!ioc) return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e7982af9a5d8..a97e2571a527 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -959,19 +959,6 @@ void pci_refresh_power_state(struct pci_dev *dev) } /** - * pci_power_up - Put the given device into D0 forcibly - * @dev: PCI device to power up - */ -void pci_power_up(struct pci_dev *dev) -{ - if (platform_pci_power_manageable(dev)) - platform_pci_set_power_state(dev, PCI_D0); - - pci_raw_set_power_state(dev, PCI_D0); - pci_update_current_state(dev, PCI_D0); -} - -/** * pci_platform_power_transition - Use platform to change device power state * @dev: PCI device to handle. * @state: State to put the device into. @@ -1154,6 +1141,17 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) EXPORT_SYMBOL(pci_set_power_state); /** + * pci_power_up - Put the given device into D0 forcibly + * @dev: PCI device to power up + */ +void pci_power_up(struct pci_dev *dev) +{ + __pci_start_power_transition(dev, PCI_D0); + pci_raw_set_power_state(dev, PCI_D0); + pci_update_current_state(dev, PCI_D0); +} + +/** * pci_choose_state - Choose the power state of a PCI device * @dev: PCI device to be suspended * @state: target sleep state for the whole system. This is the value diff --git a/drivers/platform/x86/classmate-laptop.c b/drivers/platform/x86/classmate-laptop.c index 86cc2cc68fb5..af063f690846 100644 --- a/drivers/platform/x86/classmate-laptop.c +++ b/drivers/platform/x86/classmate-laptop.c @@ -420,12 +420,6 @@ failed_sensitivity: static int cmpc_accel_remove_v4(struct acpi_device *acpi) { - struct input_dev *inputdev; - struct cmpc_accel *accel; - - inputdev = dev_get_drvdata(&acpi->dev); - accel = dev_get_drvdata(&inputdev->dev); - device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr_v4); device_remove_file(&acpi->dev, &cmpc_accel_g_select_attr_v4); return cmpc_remove_acpi_notify_device(acpi); @@ -656,12 +650,6 @@ failed_file: static int cmpc_accel_remove(struct acpi_device *acpi) { - struct input_dev *inputdev; - struct cmpc_accel *accel; - - inputdev = dev_get_drvdata(&acpi->dev); - accel = dev_get_drvdata(&inputdev->dev); - device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr); return cmpc_remove_acpi_notify_device(acpi); } diff --git a/drivers/platform/x86/i2c-multi-instantiate.c b/drivers/platform/x86/i2c-multi-instantiate.c index ea68f6ed66ae..ffb8d5d1eb5f 100644 --- a/drivers/platform/x86/i2c-multi-instantiate.c +++ b/drivers/platform/x86/i2c-multi-instantiate.c @@ -108,6 +108,7 @@ static int i2c_multi_inst_probe(struct platform_device *pdev) if (ret < 0) { dev_dbg(dev, "Error requesting irq at index %d: %d\n", inst_data[i].irq_idx, ret); + goto error; } board_info.irq = ret; break; diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c index ab7ae1950867..fa97834fdb78 100644 --- a/drivers/platform/x86/intel_punit_ipc.c +++ b/drivers/platform/x86/intel_punit_ipc.c @@ -293,9 +293,8 @@ static int intel_punit_ipc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, punit_ipcdev); - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq < 0) { - punit_ipcdev->irq = 0; dev_warn(&pdev->dev, "Invalid IRQ, using polling mode\n"); } else { ret = devm_request_irq(&pdev->dev, irq, intel_punit_ioc, diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 960961fb0d7c..0517272a268e 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -97,8 +97,8 @@ config PTP_1588_CLOCK_PCH help This driver adds support for using the PCH EG20T as a PTP clock. The hardware supports time stamping of PTP packets - when using the end-to-end delay (E2E) mechansim. The peer - delay mechansim (P2P) is not supported. + when using the end-to-end delay (E2E) mechanism. The peer + delay mechanism (P2P) is not supported. This clock is only useful if your PTP programs are getting hardware time stamps on the PTP Ethernet packets using the diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index c61f00b72e15..a577218d1ab7 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -507,6 +507,8 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, ptp_qoriq->regs.etts_regs = base + ETTS_REGS_OFFSET; } + spin_lock_init(&ptp_qoriq->lock); + ktime_get_real_ts64(&now); ptp_qoriq_settime(&ptp_qoriq->caps, &now); @@ -514,7 +516,6 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base, (ptp_qoriq->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT | (ptp_qoriq->cksel & CKSEL_MASK) << CKSEL_SHIFT; - spin_lock_init(&ptp_qoriq->lock); spin_lock_irqsave(&ptp_qoriq->lock, flags); regs = &ptp_qoriq->regs; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index fc53e1e221f0..c94184d080f8 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1553,8 +1553,8 @@ static int dasd_eckd_read_vol_info(struct dasd_device *device) if (rc == 0) { memcpy(&private->vsq, vsq, sizeof(*vsq)); } else { - dev_warn(&device->cdev->dev, - "Reading the volume storage information failed with rc=%d\n", rc); + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, + "Reading the volume storage information failed with rc=%d", rc); } if (useglobal) @@ -1737,8 +1737,8 @@ static int dasd_eckd_read_ext_pool_info(struct dasd_device *device) if (rc == 0) { dasd_eckd_cpy_ext_pool_data(device, lcq); } else { - dev_warn(&device->cdev->dev, - "Reading the logical configuration failed with rc=%d\n", rc); + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, + "Reading the logical configuration failed with rc=%d", rc); } dasd_sfree_request(cqr, cqr->memdev); @@ -2020,14 +2020,10 @@ dasd_eckd_check_characteristics(struct dasd_device *device) dasd_eckd_read_features(device); /* Read Volume Information */ - rc = dasd_eckd_read_vol_info(device); - if (rc) - goto out_err3; + dasd_eckd_read_vol_info(device); /* Read Extent Pool Information */ - rc = dasd_eckd_read_ext_pool_info(device); - if (rc) - goto out_err3; + dasd_eckd_read_ext_pool_info(device); /* Read Device Characteristics */ rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC, @@ -2059,9 +2055,6 @@ dasd_eckd_check_characteristics(struct dasd_device *device) if (readonly) set_bit(DASD_FLAG_DEVICE_RO, &device->flags); - if (dasd_eckd_is_ese(device)) - dasd_set_feature(device->cdev, DASD_FEATURE_DISCARD, 1); - dev_info(&device->cdev->dev, "New DASD %04X/%02X (CU %04X/%02X) " "with %d cylinders, %d heads, %d sectors%s\n", private->rdc_data.dev_type, @@ -3695,14 +3688,6 @@ static int dasd_eckd_release_space(struct dasd_device *device, return -EINVAL; } -static struct dasd_ccw_req * -dasd_eckd_build_cp_discard(struct dasd_device *device, struct dasd_block *block, - struct request *req, sector_t first_trk, - sector_t last_trk) -{ - return dasd_eckd_dso_ras(device, block, req, first_trk, last_trk, 1); -} - static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( struct dasd_device *startdev, struct dasd_block *block, @@ -4447,10 +4432,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev, cmdwtd = private->features.feature[12] & 0x40; use_prefix = private->features.feature[8] & 0x01; - if (req_op(req) == REQ_OP_DISCARD) - return dasd_eckd_build_cp_discard(startdev, block, req, - first_trk, last_trk); - cqr = NULL; if (cdlspecial || dasd_page_cache) { /* do nothing, just fall through to the cmd mode single case */ @@ -4729,14 +4710,12 @@ static struct dasd_ccw_req *dasd_eckd_build_alias_cp(struct dasd_device *base, struct dasd_block *block, struct request *req) { - struct dasd_device *startdev = NULL; struct dasd_eckd_private *private; - struct dasd_ccw_req *cqr; + struct dasd_device *startdev; unsigned long flags; + struct dasd_ccw_req *cqr; - /* Discard requests can only be processed on base devices */ - if (req_op(req) != REQ_OP_DISCARD) - startdev = dasd_alias_get_start_dev(base); + startdev = dasd_alias_get_start_dev(base); if (!startdev) startdev = base; private = startdev->private; @@ -5663,14 +5642,10 @@ static int dasd_eckd_restore_device(struct dasd_device *device) dasd_eckd_read_features(device); /* Read Volume Information */ - rc = dasd_eckd_read_vol_info(device); - if (rc) - goto out_err2; + dasd_eckd_read_vol_info(device); /* Read Extent Pool Information */ - rc = dasd_eckd_read_ext_pool_info(device); - if (rc) - goto out_err2; + dasd_eckd_read_ext_pool_info(device); /* Read Device Characteristics */ rc = dasd_generic_read_dev_chars(device, DASD_ECKD_MAGIC, @@ -6521,20 +6496,8 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block) unsigned int logical_block_size = block->bp_block; struct request_queue *q = block->request_queue; struct dasd_device *device = block->base; - struct dasd_eckd_private *private; - unsigned int max_discard_sectors; - unsigned int max_bytes; - unsigned int ext_bytes; /* Extent Size in Bytes */ - int recs_per_trk; - int trks_per_cyl; - int ext_limit; - int ext_size; /* Extent Size in Cylinders */ int max; - private = device->private; - trks_per_cyl = private->rdc_data.trk_per_cyl; - recs_per_trk = recs_per_track(&private->rdc_data, 0, logical_block_size); - if (device->features & DASD_FEATURE_USERAW) { /* * the max_blocks value for raw_track access is 256 @@ -6555,28 +6518,6 @@ static void dasd_eckd_setup_blk_queue(struct dasd_block *block) /* With page sized segments each segment can be translated into one idaw/tidaw */ blk_queue_max_segment_size(q, PAGE_SIZE); blk_queue_segment_boundary(q, PAGE_SIZE - 1); - - if (dasd_eckd_is_ese(device)) { - /* - * Depending on the extent size, up to UINT_MAX bytes can be - * accepted. However, neither DASD_ECKD_RAS_EXTS_MAX nor the - * device limits should be exceeded. - */ - ext_size = dasd_eckd_ext_size(device); - ext_limit = min(private->real_cyl / ext_size, DASD_ECKD_RAS_EXTS_MAX); - ext_bytes = ext_size * trks_per_cyl * recs_per_trk * - logical_block_size; - max_bytes = UINT_MAX - (UINT_MAX % ext_bytes); - if (max_bytes / ext_bytes > ext_limit) - max_bytes = ext_bytes * ext_limit; - - max_discard_sectors = max_bytes / 512; - - blk_queue_max_discard_sectors(q, max_discard_sectors); - blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); - q->limits.discard_granularity = ext_bytes; - q->limits.discard_alignment = ext_bytes; - } } static struct ccw_driver dasd_eckd_driver = { diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index ba7d2480613b..dcdaba689b20 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -113,6 +113,7 @@ struct subchannel { enum sch_todo todo; struct work_struct todo_work; struct schib_config config; + u64 dma_mask; char *driver_override; /* Driver name to force a match */ } __attribute__ ((aligned(8))); diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 1fbfb0a93f5f..831850435c23 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -232,7 +232,12 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid, * belong to a subchannel need to fit 31 bit width (e.g. ccw). */ sch->dev.coherent_dma_mask = DMA_BIT_MASK(31); - sch->dev.dma_mask = &sch->dev.coherent_dma_mask; + /* + * But we don't have such restrictions imposed on the stuff that + * is handled by the streaming API. + */ + sch->dma_mask = DMA_BIT_MASK(64); + sch->dev.dma_mask = &sch->dma_mask; return sch; err: diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 131430bd48d9..0c6245fc7706 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -710,7 +710,7 @@ static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch) if (!cdev->private) goto err_priv; cdev->dev.coherent_dma_mask = sch->dev.coherent_dma_mask; - cdev->dev.dma_mask = &cdev->dev.coherent_dma_mask; + cdev->dev.dma_mask = sch->dev.dma_mask; dma_pool = cio_gp_dma_create(&cdev->dev, 1); if (!dma_pool) goto err_dma_pool; diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index f4ca1d29d61b..cd164886132f 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -113,7 +113,7 @@ static void set_impl_params(struct qdio_irq *irq_ptr, irq_ptr->qib.pfmt = qib_param_field_format; if (qib_param_field) memcpy(irq_ptr->qib.parm, qib_param_field, - QDIO_MAX_BUFFERS_PER_Q); + sizeof(irq_ptr->qib.parm)); if (!input_slib_elements) goto output; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index a7868c8133ee..dda274351c21 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4715,8 +4715,7 @@ static int qeth_qdio_establish(struct qeth_card *card) QETH_CARD_TEXT(card, 2, "qdioest"); - qib_param_field = kzalloc(QDIO_MAX_BUFFERS_PER_Q, - GFP_KERNEL); + qib_param_field = kzalloc(FIELD_SIZEOF(struct qib, parm), GFP_KERNEL); if (!qib_param_field) { rc = -ENOMEM; goto out_free_nothing; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index b8799cd3e7aa..bd8143e51747 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2021,10 +2021,10 @@ static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc, static void qeth_l2_vnicc_init(struct qeth_card *card) { u32 *timeout = &card->options.vnicc.learning_timeout; + bool enable, error = false; unsigned int chars_len, i; unsigned long chars_tmp; u32 sup_cmds, vnicc; - bool enable, error; QETH_CARD_TEXT(card, 2, "vniccini"); /* reset rx_bcast */ @@ -2045,17 +2045,24 @@ static void qeth_l2_vnicc_init(struct qeth_card *card) chars_len = sizeof(card->options.vnicc.sup_chars) * BITS_PER_BYTE; for_each_set_bit(i, &chars_tmp, chars_len) { vnicc = BIT(i); - qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds); - if (!(sup_cmds & IPA_VNICC_SET_TIMEOUT) || - !(sup_cmds & IPA_VNICC_GET_TIMEOUT)) + if (qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds)) { + sup_cmds = 0; + error = true; + } + if ((sup_cmds & IPA_VNICC_SET_TIMEOUT) && + (sup_cmds & IPA_VNICC_GET_TIMEOUT)) + card->options.vnicc.getset_timeout_sup |= vnicc; + else card->options.vnicc.getset_timeout_sup &= ~vnicc; - if (!(sup_cmds & IPA_VNICC_ENABLE) || - !(sup_cmds & IPA_VNICC_DISABLE)) + if ((sup_cmds & IPA_VNICC_ENABLE) && + (sup_cmds & IPA_VNICC_DISABLE)) + card->options.vnicc.set_char_sup |= vnicc; + else card->options.vnicc.set_char_sup &= ~vnicc; } /* enforce assumed default values and recover settings, if changed */ - error = qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING, - timeout); + error |= qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING, + timeout); chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT; chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE; chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 296bbc3c4606..cf63916814cc 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -27,6 +27,11 @@ struct kmem_cache *zfcp_fsf_qtcb_cache; +static bool ber_stop = true; +module_param(ber_stop, bool, 0600); +MODULE_PARM_DESC(ber_stop, + "Shuts down FCP devices for FCP channels that report a bit-error count in excess of its threshold (default on)"); + static void zfcp_fsf_request_timeout_handler(struct timer_list *t) { struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer); @@ -236,10 +241,15 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req) case FSF_STATUS_READ_SENSE_DATA_AVAIL: break; case FSF_STATUS_READ_BIT_ERROR_THRESHOLD: - dev_warn(&adapter->ccw_device->dev, - "The error threshold for checksum statistics " - "has been exceeded\n"); zfcp_dbf_hba_bit_err("fssrh_3", req); + if (ber_stop) { + dev_warn(&adapter->ccw_device->dev, + "All paths over this FCP device are disused because of excessive bit errors\n"); + zfcp_erp_adapter_shutdown(adapter, 0, "fssrh_b"); + } else { + dev_warn(&adapter->ccw_device->dev, + "The error threshold for checksum statistics has been exceeded\n"); + } break; case FSF_STATUS_READ_LINK_DOWN: zfcp_fsf_status_read_link_down(req); diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index da00ca5fa5dc..401743e2b429 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -1923,6 +1923,7 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, struct fcoe_fcp_rsp_payload *fcp_rsp; struct bnx2fc_rport *tgt = io_req->tgt; struct scsi_cmnd *sc_cmd; + u16 scope = 0, qualifier = 0; /* scsi_cmd_cmpl is called with tgt lock held */ @@ -1990,12 +1991,30 @@ void bnx2fc_process_scsi_cmd_compl(struct bnx2fc_cmd *io_req, if (io_req->cdb_status == SAM_STAT_TASK_SET_FULL || io_req->cdb_status == SAM_STAT_BUSY) { - /* Set the jiffies + retry_delay_timer * 100ms - for the rport/tgt */ - tgt->retry_delay_timestamp = jiffies + - fcp_rsp->retry_delay_timer * HZ / 10; + /* Newer array firmware with BUSY or + * TASK_SET_FULL may return a status that needs + * the scope bits masked. + * Or a huge delay timestamp up to 27 minutes + * can result. + */ + if (fcp_rsp->retry_delay_timer) { + /* Upper 2 bits */ + scope = fcp_rsp->retry_delay_timer + & 0xC000; + /* Lower 14 bits */ + qualifier = fcp_rsp->retry_delay_timer + & 0x3FFF; + } + if (scope > 0 && qualifier > 0 && + qualifier <= 0x3FEF) { + /* Set the jiffies + + * retry_delay_timer * 100ms + * for the rport/tgt + */ + tgt->retry_delay_timestamp = jiffies + + (qualifier * HZ / 10); + } } - } if (io_req->fcp_resid) scsi_set_resid(sc_cmd, io_req->fcp_resid); diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index d1513fdf1e00..0847e682797b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -3683,7 +3683,7 @@ void hisi_sas_debugfs_work_handler(struct work_struct *work) } EXPORT_SYMBOL_GPL(hisi_sas_debugfs_work_handler); -void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba) +static void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba) { struct device *dev = hisi_hba->dev; int i; @@ -3705,7 +3705,7 @@ void hisi_sas_debugfs_release(struct hisi_hba *hisi_hba) devm_kfree(dev, hisi_hba->debugfs_port_reg[i]); } -int hisi_sas_debugfs_alloc(struct hisi_hba *hisi_hba) +static int hisi_sas_debugfs_alloc(struct hisi_hba *hisi_hba) { const struct hisi_sas_hw *hw = hisi_hba->hw; struct device *dev = hisi_hba->dev; @@ -3796,7 +3796,7 @@ fail: return -ENOMEM; } -void hisi_sas_debugfs_bist_init(struct hisi_hba *hisi_hba) +static void hisi_sas_debugfs_bist_init(struct hisi_hba *hisi_hba) { hisi_hba->debugfs_bist_dentry = debugfs_create_dir("bist", hisi_hba->debugfs_dir); diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 45a66048801b..ff6d4aa92421 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4183,11 +4183,11 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) */ if (pdev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ && pdev->subsystem_device == 0xC000) - return -ENODEV; + goto out_disable_device; /* Now check the magic signature byte */ pci_read_config_word(pdev, PCI_CONF_AMISIG, &magic); if (magic != HBA_SIGNATURE_471 && magic != HBA_SIGNATURE) - return -ENODEV; + goto out_disable_device; /* Ok it is probably a megaraid */ } diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 1659d35cd37b..59ca98f12afd 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -596,7 +596,7 @@ static void qedf_dcbx_handler(void *dev, struct qed_dcbx_get *get, u32 mib_type) tmp_prio = get->operational.app_prio.fcoe; if (qedf_default_prio > -1) qedf->prio = qedf_default_prio; - else if (tmp_prio < 0 || tmp_prio > 7) { + else if (tmp_prio > 7) { QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "FIP/FCoE prio %d out of range, setting to %d.\n", tmp_prio, QEDF_DEFAULT_PRIO); diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 8190c2a27584..30bafd9d21e9 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2920,6 +2920,8 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) struct qla_hw_data *ha = vha->hw; uint16_t id = vha->vp_idx; + set_bit(VPORT_DELETE, &vha->dpc_flags); + while (test_bit(LOOP_RESYNC_ACTIVE, &vha->dpc_flags) || test_bit(FCPORT_UPDATE_NEEDED, &vha->dpc_flags)) msleep(1000); diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 873a6aef1c5c..6ffa9877c28b 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2396,6 +2396,7 @@ typedef struct fc_port { unsigned int query:1; unsigned int id_changed:1; unsigned int scan_needed:1; + unsigned int n2n_flag:1; struct completion nvme_del_done; uint32_t nvme_prli_service_param; @@ -2446,7 +2447,6 @@ typedef struct fc_port { uint8_t fc4_type; uint8_t fc4f_nvme; uint8_t scan_state; - uint8_t n2n_flag; unsigned long last_queue_full; unsigned long last_ramp_up; @@ -3036,6 +3036,7 @@ enum scan_flags_t { enum fc4type_t { FS_FC4TYPE_FCP = BIT_0, FS_FC4TYPE_NVME = BIT_1, + FS_FCP_IS_N2N = BIT_7, }; struct fab_scan_rp { @@ -4394,6 +4395,7 @@ typedef struct scsi_qla_host { #define IOCB_WORK_ACTIVE 31 #define SET_ZIO_THRESHOLD_NEEDED 32 #define ISP_ABORT_TO_ROM 33 +#define VPORT_DELETE 34 unsigned long pci_flags; #define PFLG_DISCONNECTED 0 /* PCI device removed */ diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index dc0e36676313..5298ed10059f 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3102,7 +3102,8 @@ int qla24xx_post_gpnid_work(struct scsi_qla_host *vha, port_id_t *id) { struct qla_work_evt *e; - if (test_bit(UNLOADING, &vha->dpc_flags)) + if (test_bit(UNLOADING, &vha->dpc_flags) || + (vha->vp_idx && test_bit(VPORT_DELETE, &vha->dpc_flags))) return 0; e = qla2x00_alloc_work(vha, QLA_EVT_GPNID); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 643d2324082e..1d041313ec52 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -746,12 +746,15 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, break; default: if ((id.b24 != fcport->d_id.b24 && - fcport->d_id.b24) || + fcport->d_id.b24 && + fcport->loop_id != FC_NO_LOOP_ID) || (fcport->loop_id != FC_NO_LOOP_ID && fcport->loop_id != loop_id)) { ql_dbg(ql_dbg_disc, vha, 0x20e3, "%s %d %8phC post del sess\n", __func__, __LINE__, fcport->port_name); + if (fcport->n2n_flag) + fcport->d_id.b24 = 0; qlt_schedule_sess_for_deletion(fcport); return; } @@ -759,6 +762,8 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, } fcport->loop_id = loop_id; + if (fcport->n2n_flag) + fcport->d_id.b24 = id.b24; wwn = wwn_to_u64(fcport->port_name); qlt_find_sess_invalidate_other(vha, wwn, @@ -972,7 +977,7 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) wwn = wwn_to_u64(e->port_name); ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0x20e8, - "%s %8phC %02x:%02x:%02x state %d/%d lid %x \n", + "%s %8phC %02x:%02x:%02x CLS %x/%x lid %x \n", __func__, (void *)&wwn, e->port_id[2], e->port_id[1], e->port_id[0], e->current_login_state, e->last_login_state, (loop_id & 0x7fff)); @@ -1499,7 +1504,8 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) (fcport->fw_login_state == DSC_LS_PRLI_PEND))) return 0; - if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) { + if (fcport->fw_login_state == DSC_LS_PLOGI_COMP && + !N2N_TOPO(vha->hw)) { if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline)) { set_bit(RELOGIN_NEEDED, &vha->dpc_flags); return 0; @@ -1570,8 +1576,9 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) qla24xx_post_gpdb_work(vha, fcport, 0); } else { ql_dbg(ql_dbg_disc, vha, 0x2118, - "%s %d %8phC post NVMe PRLI\n", - __func__, __LINE__, fcport->port_name); + "%s %d %8phC post %s PRLI\n", + __func__, __LINE__, fcport->port_name, + fcport->fc4f_nvme ? "NVME" : "FC"); qla24xx_post_prli_work(vha, fcport); } break; @@ -1853,17 +1860,38 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) break; } - if (ea->fcport->n2n_flag) { + if (ea->fcport->fc4f_nvme) { ql_dbg(ql_dbg_disc, vha, 0x2118, "%s %d %8phC post fc4 prli\n", __func__, __LINE__, ea->fcport->port_name); ea->fcport->fc4f_nvme = 0; - ea->fcport->n2n_flag = 0; qla24xx_post_prli_work(vha, ea->fcport); + return; + } + + /* at this point both PRLI NVME & PRLI FCP failed */ + if (N2N_TOPO(vha->hw)) { + if (ea->fcport->n2n_link_reset_cnt < 3) { + ea->fcport->n2n_link_reset_cnt++; + /* + * remote port is not sending Plogi. Reset + * link to kick start his state machine + */ + set_bit(N2N_LINK_RESET, &vha->dpc_flags); + } else { + ql_log(ql_log_warn, vha, 0x2119, + "%s %d %8phC Unable to reconnect\n", + __func__, __LINE__, ea->fcport->port_name); + } + } else { + /* + * switch connect. login failed. Take connection + * down and allow relogin to retrigger + */ + ea->fcport->flags &= ~FCF_ASYNC_SENT; + ea->fcport->keep_nport_handle = 0; + qlt_schedule_sess_for_deletion(ea->fcport); } - ql_dbg(ql_dbg_disc, vha, 0x2119, - "%s %d %8phC unhandle event of %x\n", - __func__, __LINE__, ea->fcport->port_name, ea->data[0]); break; } } @@ -3190,7 +3218,7 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) for (j = 0; j < 2; j++, fwdt++) { if (!fwdt->template) { - ql_log(ql_log_warn, vha, 0x00ba, + ql_dbg(ql_dbg_init, vha, 0x00ba, "-> fwdt%u no template\n", j); continue; } @@ -4986,28 +5014,47 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) unsigned long flags; /* Inititae N2N login. */ - if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags)) { - /* borrowing */ - u32 *bp, i, sz; - - memset(ha->init_cb, 0, ha->init_cb_size); - sz = min_t(int, sizeof(struct els_plogi_payload), - ha->init_cb_size); - rval = qla24xx_get_port_login_templ(vha, ha->init_cb_dma, - (void *)ha->init_cb, sz); - if (rval == QLA_SUCCESS) { - bp = (uint32_t *)ha->init_cb; - for (i = 0; i < sz/4 ; i++, bp++) - *bp = cpu_to_be32(*bp); + if (N2N_TOPO(ha)) { + if (test_and_clear_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags)) { + /* borrowing */ + u32 *bp, i, sz; + + memset(ha->init_cb, 0, ha->init_cb_size); + sz = min_t(int, sizeof(struct els_plogi_payload), + ha->init_cb_size); + rval = qla24xx_get_port_login_templ(vha, + ha->init_cb_dma, (void *)ha->init_cb, sz); + if (rval == QLA_SUCCESS) { + bp = (uint32_t *)ha->init_cb; + for (i = 0; i < sz/4 ; i++, bp++) + *bp = cpu_to_be32(*bp); - memcpy(&ha->plogi_els_payld.data, (void *)ha->init_cb, - sizeof(ha->plogi_els_payld.data)); - set_bit(RELOGIN_NEEDED, &vha->dpc_flags); - } else { - ql_dbg(ql_dbg_init, vha, 0x00d1, - "PLOGI ELS param read fail.\n"); + memcpy(&ha->plogi_els_payld.data, + (void *)ha->init_cb, + sizeof(ha->plogi_els_payld.data)); + set_bit(RELOGIN_NEEDED, &vha->dpc_flags); + } else { + ql_dbg(ql_dbg_init, vha, 0x00d1, + "PLOGI ELS param read fail.\n"); + goto skip_login; + } + } + + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->n2n_flag) { + qla24xx_fcport_handle_login(vha, fcport); + return QLA_SUCCESS; + } + } +skip_login: + spin_lock_irqsave(&vha->work_lock, flags); + vha->scan.scan_retry++; + spin_unlock_irqrestore(&vha->work_lock, flags); + + if (vha->scan.scan_retry < MAX_SCAN_RETRIES) { + set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); + set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); } - return QLA_SUCCESS; } found_devs = 0; diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index e92e52aa6e9b..518eb954cf42 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2656,9 +2656,10 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb) els_iocb->port_id[0] = sp->fcport->d_id.b.al_pa; els_iocb->port_id[1] = sp->fcport->d_id.b.area; els_iocb->port_id[2] = sp->fcport->d_id.b.domain; - els_iocb->s_id[0] = vha->d_id.b.al_pa; - els_iocb->s_id[1] = vha->d_id.b.area; - els_iocb->s_id[2] = vha->d_id.b.domain; + /* For SID the byte order is different than DID */ + els_iocb->s_id[1] = vha->d_id.b.al_pa; + els_iocb->s_id[2] = vha->d_id.b.area; + els_iocb->s_id[0] = vha->d_id.b.domain; if (elsio->u.els_logo.els_cmd == ELS_DCMD_PLOGI) { els_iocb->control_flags = 0; diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 4c26630c1c3e..009fd5a33fcd 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2837,8 +2837,6 @@ qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt) if (sense_len == 0) { rsp->status_srb = NULL; sp->done(sp, cp->result); - } else { - WARN_ON_ONCE(true); } } diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 4c858e2d0ea8..1cc6913f76c4 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -2249,7 +2249,7 @@ qla2x00_lip_reset(scsi_qla_host_t *vha) mbx_cmd_t mc; mbx_cmd_t *mcp = &mc; - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x105a, + ql_dbg(ql_dbg_disc, vha, 0x105a, "Entered %s.\n", __func__); if (IS_CNA_CAPABLE(vha->hw)) { @@ -3883,14 +3883,24 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, case TOPO_N2N: ha->current_topology = ISP_CFG_N; spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); + list_for_each_entry(fcport, &vha->vp_fcports, list) { + fcport->scan_state = QLA_FCPORT_SCAN; + fcport->n2n_flag = 0; + } + fcport = qla2x00_find_fcport_by_wwpn(vha, rptid_entry->u.f1.port_name, 1); spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); if (fcport) { fcport->plogi_nack_done_deadline = jiffies + HZ; - fcport->dm_login_expire = jiffies + 3*HZ; + fcport->dm_login_expire = jiffies + 2*HZ; fcport->scan_state = QLA_FCPORT_FOUND; + fcport->n2n_flag = 1; + fcport->keep_nport_handle = 1; + if (vha->flags.nvme_enabled) + fcport->fc4f_nvme = 1; + switch (fcport->disc_state) { case DSC_DELETED: set_bit(RELOGIN_NEEDED, @@ -3924,7 +3934,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, rptid_entry->u.f1.port_name, rptid_entry->u.f1.node_name, NULL, - FC4_TYPE_UNKNOWN); + FS_FCP_IS_N2N); } /* if our portname is higher then initiate N2N login */ @@ -4023,6 +4033,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, list_for_each_entry(fcport, &vha->vp_fcports, list) { fcport->scan_state = QLA_FCPORT_SCAN; + fcport->n2n_flag = 0; } fcport = qla2x00_find_fcport_by_wwpn(vha, @@ -4032,6 +4043,14 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, fcport->login_retry = vha->hw->login_retry_count; fcport->plogi_nack_done_deadline = jiffies + HZ; fcport->scan_state = QLA_FCPORT_FOUND; + fcport->keep_nport_handle = 1; + fcport->n2n_flag = 1; + fcport->d_id.b.domain = + rptid_entry->u.f2.remote_nport_id[2]; + fcport->d_id.b.area = + rptid_entry->u.f2.remote_nport_id[1]; + fcport->d_id.b.al_pa = + rptid_entry->u.f2.remote_nport_id[0]; } } } diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 1a9a11ae7285..6afad68e5ba2 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -66,6 +66,7 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) uint16_t vp_id; struct qla_hw_data *ha = vha->hw; unsigned long flags = 0; + u8 i; mutex_lock(&ha->vport_lock); /* @@ -75,8 +76,9 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) * ensures no active vp_list traversal while the vport is removed * from the queue) */ - wait_event_timeout(vha->vref_waitq, !atomic_read(&vha->vref_count), - 10*HZ); + for (i = 0; i < 10 && atomic_read(&vha->vref_count); i++) + wait_event_timeout(vha->vref_waitq, + atomic_read(&vha->vref_count), HZ); spin_lock_irqsave(&ha->vport_slock, flags); if (atomic_read(&vha->vref_count)) { @@ -262,6 +264,9 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) spin_lock_irqsave(&ha->vport_slock, flags); list_for_each_entry(vha, &ha->vp_list, list) { if (vha->vp_idx) { + if (test_bit(VPORT_DELETE, &vha->dpc_flags)) + continue; + atomic_inc(&vha->vref_count); spin_unlock_irqrestore(&ha->vport_slock, flags); @@ -300,6 +305,20 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) int qla2x00_vp_abort_isp(scsi_qla_host_t *vha) { + fc_port_t *fcport; + + /* + * To exclusively reset vport, we need to log it out first. + * Note: This control_vp can fail if ISP reset is already + * issued, this is expected, as the vp would be already + * logged out due to ISP reset. + */ + if (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) { + qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL); + list_for_each_entry(fcport, &vha->vp_fcports, list) + fcport->logout_on_delete = 0; + } + /* * Physical port will do most of the abort and recovery work. We can * just treat it as a loop down @@ -312,16 +331,9 @@ qla2x00_vp_abort_isp(scsi_qla_host_t *vha) atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); } - /* - * To exclusively reset vport, we need to log it out first. Note: this - * control_vp can fail if ISP reset is already issued, this is - * expected, as the vp would be already logged out due to ISP reset. - */ - if (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) - qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL); - ql_dbg(ql_dbg_taskm, vha, 0x801d, "Scheduling enable of Vport %d.\n", vha->vp_idx); + return qla24xx_enable_vp(vha); } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 73db01e3b4e4..3568031c6504 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1115,9 +1115,15 @@ static inline int test_fcport_count(scsi_qla_host_t *vha) void qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha) { + u8 i; + qla2x00_mark_all_devices_lost(vha, 0); - wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha), 10*HZ); + for (i = 0; i < 10; i++) + wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha), + HZ); + + flush_workqueue(vha->hw->wq); } /* @@ -5036,6 +5042,10 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e) memcpy(fcport->port_name, e->u.new_sess.port_name, WWN_SIZE); + + if (e->u.new_sess.fc4_type & FS_FCP_IS_N2N) + fcport->n2n_flag = 1; + } else { ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %8phC mem alloc fail.\n", @@ -5134,11 +5144,9 @@ void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e) if (dfcp) qlt_schedule_sess_for_deletion(tfcp); - - if (N2N_TOPO(vha->hw)) - fcport->flags &= ~FCF_FABRIC_DEVICE; - if (N2N_TOPO(vha->hw)) { + fcport->flags &= ~FCF_FABRIC_DEVICE; + fcport->keep_nport_handle = 1; if (vha->flags.nvme_enabled) { fcport->fc4f_nvme = 1; fcport->n2n_flag = 1; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 0ffda6171614..a06e56224a55 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -953,7 +953,7 @@ void qlt_free_session_done(struct work_struct *work) struct qla_hw_data *ha = vha->hw; unsigned long flags; bool logout_started = false; - scsi_qla_host_t *base_vha; + scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev); struct qlt_plogi_ack_t *own = sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN]; @@ -1020,6 +1020,7 @@ void qlt_free_session_done(struct work_struct *work) if (logout_started) { bool traced = false; + u16 cnt = 0; while (!READ_ONCE(sess->logout_completed)) { if (!traced) { @@ -1029,6 +1030,9 @@ void qlt_free_session_done(struct work_struct *work) traced = true; } msleep(100); + cnt++; + if (cnt > 200) + break; } ql_dbg(ql_dbg_disc, vha, 0xf087, @@ -1101,6 +1105,7 @@ void qlt_free_session_done(struct work_struct *work) } spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + sess->free_pending = 0; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf001, "Unregistration of sess %p %8phC finished fcp_cnt %d\n", @@ -1109,17 +1114,9 @@ void qlt_free_session_done(struct work_struct *work) if (tgt && (tgt->sess_count == 0)) wake_up_all(&tgt->waitQ); - if (vha->fcport_count == 0) - wake_up_all(&vha->fcport_waitQ); - - base_vha = pci_get_drvdata(ha->pdev); - - sess->free_pending = 0; - - if (test_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags)) - return; - - if ((!tgt || !tgt->tgt_stop) && !LOOP_TRANSITION(vha)) { + if (!test_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags) && + !(vha->vp_idx && test_bit(VPORT_DELETE, &vha->dpc_flags)) && + (!tgt || !tgt->tgt_stop) && !LOOP_TRANSITION(vha)) { switch (vha->host->active_mode) { case MODE_INITIATOR: case MODE_DUAL: @@ -1132,6 +1129,9 @@ void qlt_free_session_done(struct work_struct *work) break; } } + + if (vha->fcport_count == 0) + wake_up_all(&vha->fcport_waitQ); } /* ha->tgt.sess_lock supposed to be held on entry */ @@ -1161,7 +1161,7 @@ void qlt_unreg_sess(struct fc_port *sess) sess->last_login_gen = sess->login_gen; INIT_WORK(&sess->free_work, qlt_free_session_done); - schedule_work(&sess->free_work); + queue_work(sess->vha->hw->wq, &sess->free_work); } EXPORT_SYMBOL(qlt_unreg_sess); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 1c470e31ae81..ae2fa170f6ad 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -967,6 +967,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, ses->data_direction = scmd->sc_data_direction; ses->sdb = scmd->sdb; ses->result = scmd->result; + ses->resid_len = scmd->req.resid_len; ses->underflow = scmd->underflow; ses->prot_op = scmd->prot_op; ses->eh_eflags = scmd->eh_eflags; @@ -977,6 +978,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, memset(scmd->cmnd, 0, BLK_MAX_CDB); memset(&scmd->sdb, 0, sizeof(scmd->sdb)); scmd->result = 0; + scmd->req.resid_len = 0; if (sense_bytes) { scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE, @@ -1029,6 +1031,7 @@ void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses) scmd->sc_data_direction = ses->data_direction; scmd->sdb = ses->sdb; scmd->result = ses->result; + scmd->req.resid_len = ses->resid_len; scmd->underflow = ses->underflow; scmd->prot_op = ses->prot_op; scmd->eh_eflags = ses->eh_eflags; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index dc210b9d4896..5447738906ac 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1834,6 +1834,7 @@ static const struct blk_mq_ops scsi_mq_ops_no_commit = { .init_request = scsi_mq_init_request, .exit_request = scsi_mq_exit_request, .initialize_rq_fn = scsi_initialize_rq, + .cleanup_rq = scsi_cleanup_rq, .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, }; @@ -1921,7 +1922,8 @@ struct scsi_device *scsi_device_from_queue(struct request_queue *q) { struct scsi_device *sdev = NULL; - if (q->mq_ops == &scsi_mq_ops) + if (q->mq_ops == &scsi_mq_ops_no_commit || + q->mq_ops == &scsi_mq_ops) sdev = q->queuedata; if (!sdev || !get_device(&sdev->sdev_gendev)) sdev = NULL; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 50928bc266eb..03163ac5fe95 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1654,7 +1654,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) /* we need to evaluate the error return */ if (scsi_sense_valid(sshdr) && (sshdr->asc == 0x3a || /* medium not present */ - sshdr->asc == 0x20)) /* invalid command */ + sshdr->asc == 0x20 || /* invalid command */ + (sshdr->asc == 0x74 && sshdr->ascq == 0x71))) /* drive is password locked */ /* this is no error here */ return 0; diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index ed8b9ac805e6..542d2bac2922 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1837,8 +1837,7 @@ static int storvsc_probe(struct hv_device *device, /* * Set the number of HW queues we are supporting. */ - if (stor_device->num_sc != 0) - host->nr_hw_queues = stor_device->num_sc + 1; + host->nr_hw_queues = num_present_cpus(); /* * Set the error handler work queue. diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 034dd9cb9ec8..11a87f51c442 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8143,6 +8143,9 @@ int ufshcd_shutdown(struct ufs_hba *hba) { int ret = 0; + if (!hba->is_powered) + goto out; + if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba)) goto out; diff --git a/drivers/staging/exfat/Kconfig b/drivers/staging/exfat/Kconfig index 290dbfc7ace1..ce32dfe33bec 100644 --- a/drivers/staging/exfat/Kconfig +++ b/drivers/staging/exfat/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 config EXFAT_FS tristate "exFAT fs support" depends on BLOCK @@ -6,7 +7,7 @@ config EXFAT_FS This adds support for the exFAT file system. config EXFAT_DONT_MOUNT_VFAT - bool "Prohibit mounting of fat/vfat filesysems by exFAT" + bool "Prohibit mounting of fat/vfat filesystems by exFAT" depends on EXFAT_FS default y help diff --git a/drivers/staging/exfat/Makefile b/drivers/staging/exfat/Makefile index 84944dfbae28..6c90aec83feb 100644 --- a/drivers/staging/exfat/Makefile +++ b/drivers/staging/exfat/Makefile @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: GPL-2.0-or-later obj-$(CONFIG_EXFAT_FS) += exfat.o diff --git a/drivers/staging/exfat/exfat.h b/drivers/staging/exfat/exfat.h index 6c12f2d79f4d..3abab33e932c 100644 --- a/drivers/staging/exfat/exfat.h +++ b/drivers/staging/exfat/exfat.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/exfat/exfat_blkdev.c b/drivers/staging/exfat/exfat_blkdev.c index f086c75e7076..81d20e6241c6 100644 --- a/drivers/staging/exfat/exfat_blkdev.c +++ b/drivers/staging/exfat/exfat_blkdev.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/exfat/exfat_cache.c b/drivers/staging/exfat/exfat_cache.c index 1565ce65d39f..e1b001718709 100644 --- a/drivers/staging/exfat/exfat_cache.c +++ b/drivers/staging/exfat/exfat_cache.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/exfat/exfat_core.c b/drivers/staging/exfat/exfat_core.c index b3e9cf725cf5..79174e5c4145 100644 --- a/drivers/staging/exfat/exfat_core.c +++ b/drivers/staging/exfat/exfat_core.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/exfat/exfat_nls.c b/drivers/staging/exfat/exfat_nls.c index 03cb8290b5d2..a5c4b68925fb 100644 --- a/drivers/staging/exfat/exfat_nls.c +++ b/drivers/staging/exfat/exfat_nls.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/exfat/exfat_super.c b/drivers/staging/exfat/exfat_super.c index 5f6caee819a6..3b2b0ceb7297 100644 --- a/drivers/staging/exfat/exfat_super.c +++ b/drivers/staging/exfat/exfat_super.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/time.h> #include <linux/slab.h> +#include <linux/mm.h> #include <linux/seq_file.h> #include <linux/pagemap.h> #include <linux/mpage.h> @@ -3450,7 +3451,7 @@ static void exfat_free_super(struct exfat_sb_info *sbi) kfree(sbi->options.iocharset); /* mutex_init is in exfat_fill_super function. only for 3.7+ */ mutex_destroy(&sbi->s_lock); - kfree(sbi); + kvfree(sbi); } static void exfat_put_super(struct super_block *sb) @@ -3845,7 +3846,7 @@ static int exfat_fill_super(struct super_block *sb, void *data, int silent) * the filesystem, since we're only just about to mount * it and have no inodes etc active! */ - sbi = kzalloc(sizeof(struct exfat_sb_info), GFP_KERNEL); + sbi = kvzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) return -ENOMEM; mutex_init(&sbi->s_lock); diff --git a/drivers/staging/exfat/exfat_upcase.c b/drivers/staging/exfat/exfat_upcase.c index 366082fb3dab..b91a1faa0e50 100644 --- a/drivers/staging/exfat/exfat_upcase.c +++ b/drivers/staging/exfat/exfat_upcase.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd. */ diff --git a/drivers/staging/fbtft/Kconfig b/drivers/staging/fbtft/Kconfig index 8ec524a95ec8..cb61c2a772bd 100644 --- a/drivers/staging/fbtft/Kconfig +++ b/drivers/staging/fbtft/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 menuconfig FB_TFT tristate "Support for small TFT LCD display modules" - depends on FB && SPI + depends on FB && SPI && OF depends on GPIOLIB || COMPILE_TEST select FB_SYS_FILLRECT select FB_SYS_COPYAREA @@ -199,13 +199,3 @@ config FB_TFT_WATTEROTT depends on FB_TFT help Generic Framebuffer support for WATTEROTT - -config FB_FLEX - tristate "Generic FB driver for TFT LCD displays" - depends on FB_TFT - help - Generic Framebuffer support for TFT LCD displays. - -config FB_TFT_FBTFT_DEVICE - tristate "Module to for adding FBTFT devices" - depends on FB_TFT diff --git a/drivers/staging/fbtft/Makefile b/drivers/staging/fbtft/Makefile index 6bc03311c9c7..27af43f32f81 100644 --- a/drivers/staging/fbtft/Makefile +++ b/drivers/staging/fbtft/Makefile @@ -36,7 +36,3 @@ obj-$(CONFIG_FB_TFT_UC1611) += fb_uc1611.o obj-$(CONFIG_FB_TFT_UC1701) += fb_uc1701.o obj-$(CONFIG_FB_TFT_UPD161704) += fb_upd161704.o obj-$(CONFIG_FB_TFT_WATTEROTT) += fb_watterott.o -obj-$(CONFIG_FB_FLEX) += flexfb.o - -# Device modules -obj-$(CONFIG_FB_TFT_FBTFT_DEVICE) += fbtft_device.o diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index cf5700a2ea66..a0a67aa517f0 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -714,7 +714,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display, if (par->gamma.curves && gamma) { if (fbtft_gamma_parse_str(par, par->gamma.curves, gamma, strlen(gamma))) - goto alloc_fail; + goto release_framebuf; } /* Transmit buffer */ @@ -731,7 +731,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display, if (txbuflen > 0) { txbuf = devm_kzalloc(par->info->device, txbuflen, GFP_KERNEL); if (!txbuf) - goto alloc_fail; + goto release_framebuf; par->txbuf.buf = txbuf; par->txbuf.len = txbuflen; } @@ -753,6 +753,9 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display, return info; +release_framebuf: + framebuffer_release(info); + alloc_fail: vfree(vmem); diff --git a/drivers/staging/fbtft/fbtft_device.c b/drivers/staging/fbtft/fbtft_device.c deleted file mode 100644 index 44e1410eb3fe..000000000000 --- a/drivers/staging/fbtft/fbtft_device.c +++ /dev/null @@ -1,1261 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * - * Copyright (C) 2013, Noralf Tronnes - */ - -#define pr_fmt(fmt) "fbtft_device: " fmt -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/gpio/consumer.h> -#include <linux/spi/spi.h> -#include <video/mipi_display.h> - -#include "fbtft.h" - -#define MAX_GPIOS 32 - -static struct spi_device *spi_device; -static struct platform_device *p_device; - -static char *name; -module_param(name, charp, 0000); -MODULE_PARM_DESC(name, - "Devicename (required). name=list => list all supported devices."); - -static unsigned int rotate; -module_param(rotate, uint, 0000); -MODULE_PARM_DESC(rotate, - "Angle to rotate display counter clockwise: 0, 90, 180, 270"); - -static unsigned int busnum; -module_param(busnum, uint, 0000); -MODULE_PARM_DESC(busnum, "SPI bus number (default=0)"); - -static unsigned int cs; -module_param(cs, uint, 0000); -MODULE_PARM_DESC(cs, "SPI chip select (default=0)"); - -static unsigned int speed; -module_param(speed, uint, 0000); -MODULE_PARM_DESC(speed, "SPI speed (override device default)"); - -static int mode = -1; -module_param(mode, int, 0000); -MODULE_PARM_DESC(mode, "SPI mode (override device default)"); - -static unsigned int fps; -module_param(fps, uint, 0000); -MODULE_PARM_DESC(fps, "Frames per second (override driver default)"); - -static char *gamma; -module_param(gamma, charp, 0000); -MODULE_PARM_DESC(gamma, - "String representation of Gamma Curve(s). Driver specific."); - -static int txbuflen; -module_param(txbuflen, int, 0000); -MODULE_PARM_DESC(txbuflen, "txbuflen (override driver default)"); - -static int bgr = -1; -module_param(bgr, int, 0000); -MODULE_PARM_DESC(bgr, - "BGR bit (supported by some drivers)."); - -static unsigned int startbyte; -module_param(startbyte, uint, 0000); -MODULE_PARM_DESC(startbyte, "Sets the Start byte used by some SPI displays."); - -static bool custom; -module_param(custom, bool, 0000); -MODULE_PARM_DESC(custom, "Add a custom display device. Use speed= argument to make it a SPI device, else platform_device"); - -static unsigned int width; -module_param(width, uint, 0000); -MODULE_PARM_DESC(width, "Display width, used with the custom argument"); - -static unsigned int height; -module_param(height, uint, 0000); -MODULE_PARM_DESC(height, "Display height, used with the custom argument"); - -static unsigned int buswidth = 8; -module_param(buswidth, uint, 0000); -MODULE_PARM_DESC(buswidth, "Display bus width, used with the custom argument"); - -static s16 init[FBTFT_MAX_INIT_SEQUENCE]; -static int init_num; -module_param_array(init, short, &init_num, 0000); -MODULE_PARM_DESC(init, "Init sequence, used with the custom argument"); - -static unsigned long debug; -module_param(debug, ulong, 0000); -MODULE_PARM_DESC(debug, - "level: 0-7 (the remaining 29 bits is for advanced usage)"); - -static unsigned int verbose = 3; -module_param(verbose, uint, 0000); -MODULE_PARM_DESC(verbose, - "0 silent, >1 show devices, >2 show devices before (default=3)"); - -struct fbtft_device_display { - char *name; - struct spi_board_info *spi; - struct platform_device *pdev; -}; - -static void fbtft_device_pdev_release(struct device *dev); - -static int write_gpio16_wr_slow(struct fbtft_par *par, void *buf, size_t len); -static void adafruit18_green_tab_set_addr_win(struct fbtft_par *par, - int xs, int ys, int xe, int ye); - -#define ADAFRUIT18_GAMMA \ - "02 1c 07 12 37 32 29 2d 29 25 2B 39 00 01 03 10\n" \ - "03 1d 07 06 2E 2C 29 2D 2E 2E 37 3F 00 00 02 10" - -#define CBERRY28_GAMMA \ - "D0 00 14 15 13 2C 42 43 4E 09 16 14 18 21\n" \ - "D0 00 14 15 13 0B 43 55 53 0C 17 14 23 20" - -static const s16 cberry28_init_sequence[] = { - /* turn off sleep mode */ - -1, MIPI_DCS_EXIT_SLEEP_MODE, - -2, 120, - - /* set pixel format to RGB-565 */ - -1, MIPI_DCS_SET_PIXEL_FORMAT, MIPI_DCS_PIXEL_FMT_16BIT, - - -1, 0xB2, 0x0C, 0x0C, 0x00, 0x33, 0x33, - - /* - * VGH = 13.26V - * VGL = -10.43V - */ - -1, 0xB7, 0x35, - - /* - * VDV and VRH register values come from command write - * (instead of NVM) - */ - -1, 0xC2, 0x01, 0xFF, - - /* - * VAP = 4.7V + (VCOM + VCOM offset + 0.5 * VDV) - * VAN = -4.7V + (VCOM + VCOM offset + 0.5 * VDV) - */ - -1, 0xC3, 0x17, - - /* VDV = 0V */ - -1, 0xC4, 0x20, - - /* VCOM = 0.675V */ - -1, 0xBB, 0x17, - - /* VCOM offset = 0V */ - -1, 0xC5, 0x20, - - /* - * AVDD = 6.8V - * AVCL = -4.8V - * VDS = 2.3V - */ - -1, 0xD0, 0xA4, 0xA1, - - -1, MIPI_DCS_SET_DISPLAY_ON, - - -3, -}; - -static const s16 hy28b_init_sequence[] = { - -1, 0x00e7, 0x0010, -1, 0x0000, 0x0001, - -1, 0x0001, 0x0100, -1, 0x0002, 0x0700, - -1, 0x0003, 0x1030, -1, 0x0004, 0x0000, - -1, 0x0008, 0x0207, -1, 0x0009, 0x0000, - -1, 0x000a, 0x0000, -1, 0x000c, 0x0001, - -1, 0x000d, 0x0000, -1, 0x000f, 0x0000, - -1, 0x0010, 0x0000, -1, 0x0011, 0x0007, - -1, 0x0012, 0x0000, -1, 0x0013, 0x0000, - -2, 50, -1, 0x0010, 0x1590, -1, 0x0011, - 0x0227, -2, 50, -1, 0x0012, 0x009c, -2, 50, - -1, 0x0013, 0x1900, -1, 0x0029, 0x0023, - -1, 0x002b, 0x000e, -2, 50, - -1, 0x0020, 0x0000, -1, 0x0021, 0x0000, - -2, 50, -1, 0x0050, 0x0000, - -1, 0x0051, 0x00ef, -1, 0x0052, 0x0000, - -1, 0x0053, 0x013f, -1, 0x0060, 0xa700, - -1, 0x0061, 0x0001, -1, 0x006a, 0x0000, - -1, 0x0080, 0x0000, -1, 0x0081, 0x0000, - -1, 0x0082, 0x0000, -1, 0x0083, 0x0000, - -1, 0x0084, 0x0000, -1, 0x0085, 0x0000, - -1, 0x0090, 0x0010, -1, 0x0092, 0x0000, - -1, 0x0093, 0x0003, -1, 0x0095, 0x0110, - -1, 0x0097, 0x0000, -1, 0x0098, 0x0000, - -1, 0x0007, 0x0133, -1, 0x0020, 0x0000, - -1, 0x0021, 0x0000, -2, 100, -3 }; - -#define HY28B_GAMMA \ - "04 1F 4 7 7 0 7 7 6 0\n" \ - "0F 00 1 7 4 0 0 0 6 7" - -static const s16 pitft_init_sequence[] = { - -1, MIPI_DCS_SOFT_RESET, - -2, 5, - -1, MIPI_DCS_SET_DISPLAY_OFF, - -1, 0xEF, 0x03, 0x80, 0x02, - -1, 0xCF, 0x00, 0xC1, 0x30, - -1, 0xED, 0x64, 0x03, 0x12, 0x81, - -1, 0xE8, 0x85, 0x00, 0x78, - -1, 0xCB, 0x39, 0x2C, 0x00, 0x34, 0x02, - -1, 0xF7, 0x20, - -1, 0xEA, 0x00, 0x00, - -1, 0xC0, 0x23, - -1, 0xC1, 0x10, - -1, 0xC5, 0x3E, 0x28, - -1, 0xC7, 0x86, - -1, MIPI_DCS_SET_PIXEL_FORMAT, 0x55, - -1, 0xB1, 0x00, 0x18, - -1, 0xB6, 0x08, 0x82, 0x27, - -1, 0xF2, 0x00, - -1, MIPI_DCS_SET_GAMMA_CURVE, 0x01, - -1, 0xE0, 0x0F, 0x31, 0x2B, 0x0C, 0x0E, 0x08, 0x4E, - 0xF1, 0x37, 0x07, 0x10, 0x03, 0x0E, 0x09, 0x00, - -1, 0xE1, 0x00, 0x0E, 0x14, 0x03, 0x11, 0x07, 0x31, - 0xC1, 0x48, 0x08, 0x0F, 0x0C, 0x31, 0x36, 0x0F, - -1, MIPI_DCS_EXIT_SLEEP_MODE, - -2, 100, - -1, MIPI_DCS_SET_DISPLAY_ON, - -2, 20, - -3 -}; - -static const s16 waveshare32b_init_sequence[] = { - -1, 0xCB, 0x39, 0x2C, 0x00, 0x34, 0x02, - -1, 0xCF, 0x00, 0xC1, 0x30, - -1, 0xE8, 0x85, 0x00, 0x78, - -1, 0xEA, 0x00, 0x00, - -1, 0xED, 0x64, 0x03, 0x12, 0x81, - -1, 0xF7, 0x20, - -1, 0xC0, 0x23, - -1, 0xC1, 0x10, - -1, 0xC5, 0x3E, 0x28, - -1, 0xC7, 0x86, - -1, MIPI_DCS_SET_ADDRESS_MODE, 0x28, - -1, MIPI_DCS_SET_PIXEL_FORMAT, 0x55, - -1, 0xB1, 0x00, 0x18, - -1, 0xB6, 0x08, 0x82, 0x27, - -1, 0xF2, 0x00, - -1, MIPI_DCS_SET_GAMMA_CURVE, 0x01, - -1, 0xE0, 0x0F, 0x31, 0x2B, 0x0C, 0x0E, 0x08, 0x4E, - 0xF1, 0x37, 0x07, 0x10, 0x03, 0x0E, 0x09, 0x00, - -1, 0xE1, 0x00, 0x0E, 0x14, 0x03, 0x11, 0x07, 0x31, - 0xC1, 0x48, 0x08, 0x0F, 0x0C, 0x31, 0x36, 0x0F, - -1, MIPI_DCS_EXIT_SLEEP_MODE, - -2, 120, - -1, MIPI_DCS_SET_DISPLAY_ON, - -1, MIPI_DCS_WRITE_MEMORY_START, - -3 -}; - -#define PIOLED_GAMMA "0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 " \ - "2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 " \ - "3 3 3 4 4 4 4 4 4 4 4 4 4 4 4" - -/* Supported displays in alphabetical order */ -static struct fbtft_device_display displays[] = { - { - .name = "adafruit18", - .spi = &(struct spi_board_info) { - .modalias = "fb_st7735r", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .gamma = ADAFRUIT18_GAMMA, - } - } - }, { - .name = "adafruit18_green", - .spi = &(struct spi_board_info) { - .modalias = "fb_st7735r", - .max_speed_hz = 4000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - .fbtftops.set_addr_win = - adafruit18_green_tab_set_addr_win, - }, - .bgr = true, - .gamma = ADAFRUIT18_GAMMA, - } - } - }, { - .name = "adafruit22", - .spi = &(struct spi_board_info) { - .modalias = "fb_hx8340bn", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 9, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "adafruit22a", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9340", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "adafruit28", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9341", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "adafruit13m", - .spi = &(struct spi_board_info) { - .modalias = "fb_ssd1306", - .max_speed_hz = 16000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - }, - } - } - }, { - .name = "admatec_c-berry28", - .spi = &(struct spi_board_info) { - .modalias = "fb_st7789v", - .max_speed_hz = 48000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - .init_sequence = cberry28_init_sequence, - }, - .gamma = CBERRY28_GAMMA, - } - } - }, { - .name = "agm1264k-fl", - .pdev = &(struct platform_device) { - .name = "fb_agm1264k-fl", - .id = 0, - .dev = { - .release = fbtft_device_pdev_release, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = FBTFT_ONBOARD_BACKLIGHT, - }, - }, - } - } - }, { - .name = "dogs102", - .spi = &(struct spi_board_info) { - .modalias = "fb_uc1701", - .max_speed_hz = 8000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - }, - .bgr = true, - } - } - }, { - .name = "er_tftm050_2", - .spi = &(struct spi_board_info) { - .modalias = "fb_ra8875", - .max_speed_hz = 5000000, - .mode = SPI_MODE_3, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - .width = 480, - .height = 272, - }, - .bgr = true, - } - } - }, { - .name = "er_tftm070_5", - .spi = &(struct spi_board_info) { - .modalias = "fb_ra8875", - .max_speed_hz = 5000000, - .mode = SPI_MODE_3, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - .width = 800, - .height = 480, - }, - .bgr = true, - } - } - }, { - .name = "ew24ha0", - .spi = &(struct spi_board_info) { - .modalias = "fb_uc1611", - .max_speed_hz = 32000000, - .mode = SPI_MODE_3, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - }, - } - } - }, { - .name = "ew24ha0_9bit", - .spi = &(struct spi_board_info) { - .modalias = "fb_uc1611", - .max_speed_hz = 32000000, - .mode = SPI_MODE_3, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 9, - }, - } - } - }, { - .name = "flexfb", - .spi = &(struct spi_board_info) { - .modalias = "flexfb", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - } - }, { - .name = "flexpfb", - .pdev = &(struct platform_device) { - .name = "flexpfb", - .id = 0, - .dev = { - .release = fbtft_device_pdev_release, - } - } - }, { - .name = "freetronicsoled128", - .spi = &(struct spi_board_info) { - .modalias = "fb_ssd1351", - .max_speed_hz = 20000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = FBTFT_ONBOARD_BACKLIGHT, - }, - .bgr = true, - } - } - }, { - .name = "hx8353d", - .spi = &(struct spi_board_info) { - .modalias = "fb_hx8353d", - .max_speed_hz = 16000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - } - } - }, { - .name = "hy28a", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9320", - .max_speed_hz = 32000000, - .mode = SPI_MODE_3, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .startbyte = 0x70, - .bgr = true, - } - } - }, { - .name = "hy28b", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9325", - .max_speed_hz = 48000000, - .mode = SPI_MODE_3, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - .init_sequence = hy28b_init_sequence, - }, - .startbyte = 0x70, - .bgr = true, - .fps = 50, - .gamma = HY28B_GAMMA, - } - } - }, { - .name = "ili9481", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9481", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .regwidth = 16, - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "itdb24", - .pdev = &(struct platform_device) { - .name = "fb_s6d1121", - .id = 0, - .dev = { - .release = fbtft_device_pdev_release, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = false, - }, - } - } - }, { - .name = "itdb28", - .pdev = &(struct platform_device) { - .name = "fb_ili9325", - .id = 0, - .dev = { - .release = fbtft_device_pdev_release, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - }, - } - } - }, { - .name = "itdb28_spi", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9325", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "mi0283qt-2", - .spi = &(struct spi_board_info) { - .modalias = "fb_hx8347d", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .startbyte = 0x70, - .bgr = true, - } - } - }, { - .name = "mi0283qt-9a", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9341", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 9, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "mi0283qt-v2", - .spi = &(struct spi_board_info) { - .modalias = "fb_watterott", - .max_speed_hz = 4000000, - .mode = SPI_MODE_3, - .platform_data = &(struct fbtft_platform_data) { - } - } - }, { - .name = "nokia3310", - .spi = &(struct spi_board_info) { - .modalias = "fb_pcd8544", - .max_speed_hz = 400000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - }, - } - } - }, { - .name = "nokia3310a", - .spi = &(struct spi_board_info) { - .modalias = "fb_tls8204", - .max_speed_hz = 1000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - }, - } - } - }, { - .name = "nokia5110", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9163", - .max_speed_hz = 12000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "piscreen", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9486", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .regwidth = 16, - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "pitft", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9340", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .chip_select = 0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - .init_sequence = pitft_init_sequence, - }, - .bgr = true, - } - } - }, { - .name = "pioled", - .spi = &(struct spi_board_info) { - .modalias = "fb_ssd1351", - .max_speed_hz = 20000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - }, - .bgr = true, - .gamma = PIOLED_GAMMA - } - } - }, { - .name = "rpi-display", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9341", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "s6d02a1", - .spi = &(struct spi_board_info) { - .modalias = "fb_s6d02a1", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "sainsmart18", - .spi = &(struct spi_board_info) { - .modalias = "fb_st7735r", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - }, - } - } - }, { - .name = "sainsmart32", - .pdev = &(struct platform_device) { - .name = "fb_ssd1289", - .id = 0, - .dev = { - .release = fbtft_device_pdev_release, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 16, - .txbuflen = -2, /* disable buffer */ - .backlight = 1, - .fbtftops.write = write_gpio16_wr_slow, - }, - .bgr = true, - }, - }, - } - }, { - .name = "sainsmart32_fast", - .pdev = &(struct platform_device) { - .name = "fb_ssd1289", - .id = 0, - .dev = { - .release = fbtft_device_pdev_release, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 16, - .txbuflen = -2, /* disable buffer */ - .backlight = 1, - }, - .bgr = true, - }, - }, - } - }, { - .name = "sainsmart32_latched", - .pdev = &(struct platform_device) { - .name = "fb_ssd1289", - .id = 0, - .dev = { - .release = fbtft_device_pdev_release, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 16, - .txbuflen = -2, /* disable buffer */ - .backlight = 1, - .fbtftops.write = - fbtft_write_gpio16_wr_latched, - }, - .bgr = true, - }, - }, - } - }, { - .name = "sainsmart32_spi", - .spi = &(struct spi_board_info) { - .modalias = "fb_ssd1289", - .max_speed_hz = 16000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "spidev", - .spi = &(struct spi_board_info) { - .modalias = "spidev", - .max_speed_hz = 500000, - .bus_num = 0, - .chip_select = 0, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - } - } - }, { - .name = "ssd1331", - .spi = &(struct spi_board_info) { - .modalias = "fb_ssd1331", - .max_speed_hz = 20000000, - .mode = SPI_MODE_3, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - }, - } - } - }, { - .name = "tinylcd35", - .spi = &(struct spi_board_info) { - .modalias = "fb_tinylcd", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "tm022hdh26", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9341", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "tontec35_9481", /* boards before 02 July 2014 */ - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9481", - .max_speed_hz = 128000000, - .mode = SPI_MODE_3, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "tontec35_9486", /* boards after 02 July 2014 */ - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9486", - .max_speed_hz = 128000000, - .mode = SPI_MODE_3, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - }, - .bgr = true, - } - } - }, { - .name = "upd161704", - .spi = &(struct spi_board_info) { - .modalias = "fb_upd161704", - .max_speed_hz = 32000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - }, - } - } - }, { - .name = "waveshare32b", - .spi = &(struct spi_board_info) { - .modalias = "fb_ili9340", - .max_speed_hz = 48000000, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - .backlight = 1, - .init_sequence = - waveshare32b_init_sequence, - }, - .bgr = true, - } - } - }, { - .name = "waveshare22", - .spi = &(struct spi_board_info) { - .modalias = "fb_bd663474", - .max_speed_hz = 32000000, - .mode = SPI_MODE_3, - .platform_data = &(struct fbtft_platform_data) { - .display = { - .buswidth = 8, - }, - } - } - }, { - /* This should be the last item. - * Used with the custom argument - */ - .name = "", - .spi = &(struct spi_board_info) { - .modalias = "", - .max_speed_hz = 0, - .mode = SPI_MODE_0, - .platform_data = &(struct fbtft_platform_data) { - } - }, - .pdev = &(struct platform_device) { - .name = "", - .id = 0, - .dev = { - .release = fbtft_device_pdev_release, - .platform_data = &(struct fbtft_platform_data) { - }, - }, - }, - } -}; - -static int write_gpio16_wr_slow(struct fbtft_par *par, void *buf, size_t len) -{ - u16 data; - int i; -#ifndef DO_NOT_OPTIMIZE_FBTFT_WRITE_GPIO - static u16 prev_data; -#endif - - fbtft_par_dbg_hex(DEBUG_WRITE, par, par->info->device, u8, buf, len, - "%s(len=%zu): ", __func__, len); - - while (len) { - data = *(u16 *)buf; - - /* Start writing by pulling down /WR */ - gpiod_set_value(par->gpio.wr, 0); - - /* Set data */ -#ifndef DO_NOT_OPTIMIZE_FBTFT_WRITE_GPIO - if (data == prev_data) { - gpiod_set_value(par->gpio.wr, 0); /* used as delay */ - } else { - for (i = 0; i < 16; i++) { - if ((data & 1) != (prev_data & 1)) - gpiod_set_value(par->gpio.db[i], - data & 1); - data >>= 1; - prev_data >>= 1; - } - } -#else - for (i = 0; i < 16; i++) { - gpiod_set_value(par->gpio.db[i], data & 1); - data >>= 1; - } -#endif - - /* Pullup /WR */ - gpiod_set_value(par->gpio.wr, 1); - -#ifndef DO_NOT_OPTIMIZE_FBTFT_WRITE_GPIO - prev_data = *(u16 *)buf; -#endif - buf += 2; - len -= 2; - } - - return 0; -} - -static void adafruit18_green_tab_set_addr_win(struct fbtft_par *par, - int xs, int ys, int xe, int ye) -{ - write_reg(par, 0x2A, 0, xs + 2, 0, xe + 2); - write_reg(par, 0x2B, 0, ys + 1, 0, ye + 1); - write_reg(par, 0x2C); -} - -static void fbtft_device_pdev_release(struct device *dev) -{ -/* Needed to silence this message: - * Device 'xxx' does not have a release() function, - * it is broken and must be fixed - */ -} - -static int spi_device_found(struct device *dev, void *data) -{ - struct spi_device *spi = to_spi_device(dev); - - dev_info(dev, "%s %s %dkHz %d bits mode=0x%02X\n", spi->modalias, - dev_name(dev), spi->max_speed_hz / 1000, spi->bits_per_word, - spi->mode); - - return 0; -} - -static void pr_spi_devices(void) -{ - pr_debug("SPI devices registered:\n"); - bus_for_each_dev(&spi_bus_type, NULL, NULL, spi_device_found); -} - -static int p_device_found(struct device *dev, void *data) -{ - struct platform_device - *pdev = to_platform_device(dev); - - if (strstr(pdev->name, "fb")) - dev_info(dev, "%s id=%d pdata? %s\n", pdev->name, pdev->id, - pdev->dev.platform_data ? "yes" : "no"); - - return 0; -} - -static void pr_p_devices(void) -{ - pr_debug("'fb' Platform devices registered:\n"); - bus_for_each_dev(&platform_bus_type, NULL, NULL, p_device_found); -} - -#ifdef MODULE -static void fbtft_device_spi_delete(struct spi_master *master, unsigned int cs) -{ - struct device *dev; - char str[32]; - - snprintf(str, sizeof(str), "%s.%u", dev_name(&master->dev), cs); - - dev = bus_find_device_by_name(&spi_bus_type, NULL, str); - if (dev) { - if (verbose) - dev_info(dev, "Deleting %s\n", str); - device_del(dev); - } -} - -static int fbtft_device_spi_device_register(struct spi_board_info *spi) -{ - struct spi_master *master; - - master = spi_busnum_to_master(spi->bus_num); - if (!master) { - pr_err("spi_busnum_to_master(%d) returned NULL\n", - spi->bus_num); - return -EINVAL; - } - /* make sure it's available */ - fbtft_device_spi_delete(master, spi->chip_select); - spi_device = spi_new_device(master, spi); - put_device(&master->dev); - if (!spi_device) { - dev_err(&master->dev, "spi_new_device() returned NULL\n"); - return -EPERM; - } - return 0; -} -#else -static int fbtft_device_spi_device_register(struct spi_board_info *spi) -{ - return spi_register_board_info(spi, 1); -} -#endif - -static int __init fbtft_device_init(void) -{ - struct spi_board_info *spi = NULL; - struct fbtft_platform_data *pdata; - bool found = false; - int i = 0; - int ret = 0; - - if (!name) { -#ifdef MODULE - pr_err("missing module parameter: 'name'\n"); - return -EINVAL; -#else - return 0; -#endif - } - - if (init_num > FBTFT_MAX_INIT_SEQUENCE) { - pr_err("init parameter: exceeded max array size: %d\n", - FBTFT_MAX_INIT_SEQUENCE); - return -EINVAL; - } - - if (verbose > 2) { - pr_spi_devices(); /* print list of registered SPI devices */ - pr_p_devices(); /* print list of 'fb' platform devices */ - } - - pr_debug("name='%s', busnum=%d, cs=%d\n", name, busnum, cs); - - if (rotate > 0 && rotate < 4) { - rotate = (4 - rotate) * 90; - pr_warn("argument 'rotate' should be an angle. Values 1-3 is deprecated. Setting it to %d.\n", - rotate); - } - if (rotate != 0 && rotate != 90 && rotate != 180 && rotate != 270) { - pr_warn("argument 'rotate' illegal value: %d. Setting it to 0.\n", - rotate); - rotate = 0; - } - - /* name=list lists all supported displays */ - if (strcmp(name, "list") == 0) { - pr_info("Supported displays:\n"); - - for (i = 0; i < ARRAY_SIZE(displays); i++) - pr_info("%s\n", displays[i].name); - return -ECANCELED; - } - - if (custom) { - i = ARRAY_SIZE(displays) - 1; - displays[i].name = name; - if (speed == 0) { - displays[i].pdev->name = name; - displays[i].spi = NULL; - } else { - size_t len; - - len = strlcpy(displays[i].spi->modalias, name, - SPI_NAME_SIZE); - if (len >= SPI_NAME_SIZE) - pr_warn("modalias (name) truncated to: %s\n", - displays[i].spi->modalias); - displays[i].pdev = NULL; - } - } - - for (i = 0; i < ARRAY_SIZE(displays); i++) { - if (strncmp(name, displays[i].name, SPI_NAME_SIZE) == 0) { - if (displays[i].spi) { - spi = displays[i].spi; - spi->chip_select = cs; - spi->bus_num = busnum; - if (speed) - spi->max_speed_hz = speed; - if (mode != -1) - spi->mode = mode; - pdata = (void *)spi->platform_data; - } else if (displays[i].pdev) { - p_device = displays[i].pdev; - pdata = p_device->dev.platform_data; - } else { - pr_err("broken displays array\n"); - return -EINVAL; - } - - pdata->rotate = rotate; - if (bgr == 0) - pdata->bgr = false; - else if (bgr == 1) - pdata->bgr = true; - if (startbyte) - pdata->startbyte = startbyte; - if (gamma) - pdata->gamma = gamma; - pdata->display.debug = debug; - if (fps) - pdata->fps = fps; - if (txbuflen) - pdata->txbuflen = txbuflen; - if (init_num) - pdata->display.init_sequence = init; - if (custom) { - pdata->display.width = width; - pdata->display.height = height; - pdata->display.buswidth = buswidth; - pdata->display.backlight = 1; - } - - if (displays[i].spi) { - ret = fbtft_device_spi_device_register(spi); - if (ret) { - pr_err("failed to register SPI device\n"); - return ret; - } - } else { - ret = platform_device_register(p_device); - if (ret < 0) { - pr_err("platform_device_register() returned %d\n", - ret); - return ret; - } - } - found = true; - break; - } - } - - if (!found) { - pr_err("display not supported: '%s'\n", name); - return -EINVAL; - } - - if (spi_device && (verbose > 1)) - pr_spi_devices(); - if (p_device && (verbose > 1)) - pr_p_devices(); - - return 0; -} - -static void __exit fbtft_device_exit(void) -{ - if (spi_device) { - device_del(&spi_device->dev); - kfree(spi_device); - } - - if (p_device) - platform_device_unregister(p_device); -} - -arch_initcall(fbtft_device_init); -module_exit(fbtft_device_exit); - -MODULE_DESCRIPTION("Add a FBTFT device."); -MODULE_AUTHOR("Noralf Tronnes"); -MODULE_LICENSE("GPL"); diff --git a/drivers/staging/fbtft/flexfb.c b/drivers/staging/fbtft/flexfb.c deleted file mode 100644 index 3747321011fa..000000000000 --- a/drivers/staging/fbtft/flexfb.c +++ /dev/null @@ -1,851 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Generic FB driver for TFT LCD displays - * - * Copyright (C) 2013 Noralf Tronnes - */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/vmalloc.h> -#include <linux/gpio/consumer.h> -#include <linux/spi/spi.h> -#include <linux/delay.h> - -#include "fbtft.h" - -#define DRVNAME "flexfb" - -static char *chip; -module_param(chip, charp, 0000); -MODULE_PARM_DESC(chip, "LCD controller"); - -static unsigned int width; -module_param(width, uint, 0000); -MODULE_PARM_DESC(width, "Display width"); - -static unsigned int height; -module_param(height, uint, 0000); -MODULE_PARM_DESC(height, "Display height"); - -static s16 init[512]; -static int init_num; -module_param_array(init, short, &init_num, 0000); -MODULE_PARM_DESC(init, "Init sequence"); - -static unsigned int setaddrwin; -module_param(setaddrwin, uint, 0000); -MODULE_PARM_DESC(setaddrwin, "Which set_addr_win() implementation to use"); - -static unsigned int buswidth = 8; -module_param(buswidth, uint, 0000); -MODULE_PARM_DESC(buswidth, "Width of databus (default: 8)"); - -static unsigned int regwidth = 8; -module_param(regwidth, uint, 0000); -MODULE_PARM_DESC(regwidth, "Width of controller register (default: 8)"); - -static bool nobacklight; -module_param(nobacklight, bool, 0000); -MODULE_PARM_DESC(nobacklight, "Turn off backlight functionality."); - -static bool latched; -module_param(latched, bool, 0000); -MODULE_PARM_DESC(latched, "Use with latched 16-bit databus"); - -static const s16 *initp; -static int initp_num; - -/* default init sequences */ -static const s16 st7735r_init[] = { - -1, 0x01, - -2, 150, - -1, 0x11, - -2, 500, - -1, 0xB1, 0x01, 0x2C, 0x2D, - -1, 0xB2, 0x01, 0x2C, 0x2D, - -1, 0xB3, 0x01, 0x2C, 0x2D, 0x01, 0x2C, 0x2D, - -1, 0xB4, 0x07, - -1, 0xC0, 0xA2, 0x02, 0x84, - -1, 0xC1, 0xC5, - -1, 0xC2, 0x0A, 0x00, - -1, 0xC3, 0x8A, 0x2A, - -1, 0xC4, 0x8A, 0xEE, - -1, 0xC5, 0x0E, - -1, 0x20, - -1, 0x36, 0xC0, - -1, 0x3A, 0x05, - -1, 0xE0, 0x0f, 0x1a, 0x0f, 0x18, 0x2f, 0x28, 0x20, 0x22, - 0x1f, 0x1b, 0x23, 0x37, 0x00, 0x07, 0x02, 0x10, - -1, 0xE1, 0x0f, 0x1b, 0x0f, 0x17, 0x33, 0x2c, 0x29, 0x2e, - 0x30, 0x30, 0x39, 0x3f, 0x00, 0x07, 0x03, 0x10, - -1, 0x29, - -2, 100, - -1, 0x13, - -2, 10, - -3 -}; - -static const s16 ssd1289_init[] = { - -1, 0x00, 0x0001, - -1, 0x03, 0xA8A4, - -1, 0x0C, 0x0000, - -1, 0x0D, 0x080C, - -1, 0x0E, 0x2B00, - -1, 0x1E, 0x00B7, - -1, 0x01, 0x2B3F, - -1, 0x02, 0x0600, - -1, 0x10, 0x0000, - -1, 0x11, 0x6070, - -1, 0x05, 0x0000, - -1, 0x06, 0x0000, - -1, 0x16, 0xEF1C, - -1, 0x17, 0x0003, - -1, 0x07, 0x0233, - -1, 0x0B, 0x0000, - -1, 0x0F, 0x0000, - -1, 0x41, 0x0000, - -1, 0x42, 0x0000, - -1, 0x48, 0x0000, - -1, 0x49, 0x013F, - -1, 0x4A, 0x0000, - -1, 0x4B, 0x0000, - -1, 0x44, 0xEF00, - -1, 0x45, 0x0000, - -1, 0x46, 0x013F, - -1, 0x30, 0x0707, - -1, 0x31, 0x0204, - -1, 0x32, 0x0204, - -1, 0x33, 0x0502, - -1, 0x34, 0x0507, - -1, 0x35, 0x0204, - -1, 0x36, 0x0204, - -1, 0x37, 0x0502, - -1, 0x3A, 0x0302, - -1, 0x3B, 0x0302, - -1, 0x23, 0x0000, - -1, 0x24, 0x0000, - -1, 0x25, 0x8000, - -1, 0x4f, 0x0000, - -1, 0x4e, 0x0000, - -1, 0x22, - -3 -}; - -static const s16 hx8340bn_init[] = { - -1, 0xC1, 0xFF, 0x83, 0x40, - -1, 0x11, - -2, 150, - -1, 0xCA, 0x70, 0x00, 0xD9, - -1, 0xB0, 0x01, 0x11, - -1, 0xC9, 0x90, 0x49, 0x10, 0x28, 0x28, 0x10, 0x00, 0x06, - -2, 20, - -1, 0xC2, 0x60, 0x71, 0x01, 0x0E, 0x05, 0x02, 0x09, 0x31, 0x0A, - -1, 0xC3, 0x67, 0x30, 0x61, 0x17, 0x48, 0x07, 0x05, 0x33, - -2, 10, - -1, 0xB5, 0x35, 0x20, 0x45, - -1, 0xB4, 0x33, 0x25, 0x4C, - -2, 10, - -1, 0x3A, 0x05, - -1, 0x29, - -2, 10, - -3 -}; - -static const s16 ili9225_init[] = { - -1, 0x0001, 0x011C, - -1, 0x0002, 0x0100, - -1, 0x0003, 0x1030, - -1, 0x0008, 0x0808, - -1, 0x000C, 0x0000, - -1, 0x000F, 0x0A01, - -1, 0x0020, 0x0000, - -1, 0x0021, 0x0000, - -2, 50, - -1, 0x0010, 0x0A00, - -1, 0x0011, 0x1038, - -2, 50, - -1, 0x0012, 0x1121, - -1, 0x0013, 0x004E, - -1, 0x0014, 0x676F, - -1, 0x0030, 0x0000, - -1, 0x0031, 0x00DB, - -1, 0x0032, 0x0000, - -1, 0x0033, 0x0000, - -1, 0x0034, 0x00DB, - -1, 0x0035, 0x0000, - -1, 0x0036, 0x00AF, - -1, 0x0037, 0x0000, - -1, 0x0038, 0x00DB, - -1, 0x0039, 0x0000, - -1, 0x0050, 0x0000, - -1, 0x0051, 0x060A, - -1, 0x0052, 0x0D0A, - -1, 0x0053, 0x0303, - -1, 0x0054, 0x0A0D, - -1, 0x0055, 0x0A06, - -1, 0x0056, 0x0000, - -1, 0x0057, 0x0303, - -1, 0x0058, 0x0000, - -1, 0x0059, 0x0000, - -2, 50, - -1, 0x0007, 0x1017, - -2, 50, - -3 -}; - -static const s16 ili9320_init[] = { - -1, 0x00E5, 0x8000, - -1, 0x0000, 0x0001, - -1, 0x0001, 0x0100, - -1, 0x0002, 0x0700, - -1, 0x0003, 0x1030, - -1, 0x0004, 0x0000, - -1, 0x0008, 0x0202, - -1, 0x0009, 0x0000, - -1, 0x000A, 0x0000, - -1, 0x000C, 0x0000, - -1, 0x000D, 0x0000, - -1, 0x000F, 0x0000, - -1, 0x0010, 0x0000, - -1, 0x0011, 0x0007, - -1, 0x0012, 0x0000, - -1, 0x0013, 0x0000, - -2, 200, - -1, 0x0010, 0x17B0, - -1, 0x0011, 0x0031, - -2, 50, - -1, 0x0012, 0x0138, - -2, 50, - -1, 0x0013, 0x1800, - -1, 0x0029, 0x0008, - -2, 50, - -1, 0x0020, 0x0000, - -1, 0x0021, 0x0000, - -1, 0x0030, 0x0000, - -1, 0x0031, 0x0505, - -1, 0x0032, 0x0004, - -1, 0x0035, 0x0006, - -1, 0x0036, 0x0707, - -1, 0x0037, 0x0105, - -1, 0x0038, 0x0002, - -1, 0x0039, 0x0707, - -1, 0x003C, 0x0704, - -1, 0x003D, 0x0807, - -1, 0x0050, 0x0000, - -1, 0x0051, 0x00EF, - -1, 0x0052, 0x0000, - -1, 0x0053, 0x013F, - -1, 0x0060, 0x2700, - -1, 0x0061, 0x0001, - -1, 0x006A, 0x0000, - -1, 0x0080, 0x0000, - -1, 0x0081, 0x0000, - -1, 0x0082, 0x0000, - -1, 0x0083, 0x0000, - -1, 0x0084, 0x0000, - -1, 0x0085, 0x0000, - -1, 0x0090, 0x0010, - -1, 0x0092, 0x0000, - -1, 0x0093, 0x0003, - -1, 0x0095, 0x0110, - -1, 0x0097, 0x0000, - -1, 0x0098, 0x0000, - -1, 0x0007, 0x0173, - -3 -}; - -static const s16 ili9325_init[] = { - -1, 0x00E3, 0x3008, - -1, 0x00E7, 0x0012, - -1, 0x00EF, 0x1231, - -1, 0x0001, 0x0100, - -1, 0x0002, 0x0700, - -1, 0x0003, 0x1030, - -1, 0x0004, 0x0000, - -1, 0x0008, 0x0207, - -1, 0x0009, 0x0000, - -1, 0x000A, 0x0000, - -1, 0x000C, 0x0000, - -1, 0x000D, 0x0000, - -1, 0x000F, 0x0000, - -1, 0x0010, 0x0000, - -1, 0x0011, 0x0007, - -1, 0x0012, 0x0000, - -1, 0x0013, 0x0000, - -2, 200, - -1, 0x0010, 0x1690, - -1, 0x0011, 0x0223, - -2, 50, - -1, 0x0012, 0x000D, - -2, 50, - -1, 0x0013, 0x1200, - -1, 0x0029, 0x000A, - -1, 0x002B, 0x000C, - -2, 50, - -1, 0x0020, 0x0000, - -1, 0x0021, 0x0000, - -1, 0x0030, 0x0000, - -1, 0x0031, 0x0506, - -1, 0x0032, 0x0104, - -1, 0x0035, 0x0207, - -1, 0x0036, 0x000F, - -1, 0x0037, 0x0306, - -1, 0x0038, 0x0102, - -1, 0x0039, 0x0707, - -1, 0x003C, 0x0702, - -1, 0x003D, 0x1604, - -1, 0x0050, 0x0000, - -1, 0x0051, 0x00EF, - -1, 0x0052, 0x0000, - -1, 0x0053, 0x013F, - -1, 0x0060, 0xA700, - -1, 0x0061, 0x0001, - -1, 0x006A, 0x0000, - -1, 0x0080, 0x0000, - -1, 0x0081, 0x0000, - -1, 0x0082, 0x0000, - -1, 0x0083, 0x0000, - -1, 0x0084, 0x0000, - -1, 0x0085, 0x0000, - -1, 0x0090, 0x0010, - -1, 0x0092, 0x0600, - -1, 0x0007, 0x0133, - -3 -}; - -static const s16 ili9341_init[] = { - -1, 0x28, - -2, 20, - -1, 0xCF, 0x00, 0x83, 0x30, - -1, 0xED, 0x64, 0x03, 0x12, 0x81, - -1, 0xE8, 0x85, 0x01, 0x79, - -1, 0xCB, 0x39, 0x2c, 0x00, 0x34, 0x02, - -1, 0xF7, 0x20, - -1, 0xEA, 0x00, 0x00, - -1, 0xC0, 0x26, - -1, 0xC1, 0x11, - -1, 0xC5, 0x35, 0x3E, - -1, 0xC7, 0xBE, - -1, 0xB1, 0x00, 0x1B, - -1, 0xB6, 0x0a, 0x82, 0x27, 0x00, - -1, 0xB7, 0x07, - -1, 0x3A, 0x55, - -1, 0x36, 0x48, - -1, 0x11, - -2, 120, - -1, 0x29, - -2, 20, - -3 -}; - -static const s16 ssd1351_init[] = { - -1, 0xfd, 0x12, - -1, 0xfd, 0xb1, - -1, 0xae, - -1, 0xb3, 0xf1, - -1, 0xca, 0x7f, - -1, 0xa0, 0x74, - -1, 0x15, 0x00, 0x7f, - -1, 0x75, 0x00, 0x7f, - -1, 0xa1, 0x00, - -1, 0xa2, 0x00, - -1, 0xb5, 0x00, - -1, 0xab, 0x01, - -1, 0xb1, 0x32, - -1, 0xb4, 0xa0, 0xb5, 0x55, - -1, 0xbb, 0x17, - -1, 0xbe, 0x05, - -1, 0xc1, 0xc8, 0x80, 0xc8, - -1, 0xc7, 0x0f, - -1, 0xb6, 0x01, - -1, 0xa6, - -1, 0xaf, - -3 -}; - -/** - * struct flexfb_lcd_controller - Describes the LCD controller properties - * @name: Model name of the chip - * @width: Width of display in pixels - * @height: Height of display in pixels - * @setaddrwin: Which set_addr_win() implementation to use - * @regwidth: LCD Controller Register width in bits - * @init_seq: LCD initialization sequence - * @init_seq_sz: Size of LCD initialization sequence - */ -struct flexfb_lcd_controller { - const char *name; - unsigned int width; - unsigned int height; - unsigned int setaddrwin; - unsigned int regwidth; - const s16 *init_seq; - int init_seq_sz; -}; - -static const struct flexfb_lcd_controller flexfb_chip_table[] = { - { - .name = "st7735r", - .width = 120, - .height = 160, - .init_seq = st7735r_init, - .init_seq_sz = ARRAY_SIZE(st7735r_init), - }, - { - .name = "hx8340bn", - .width = 176, - .height = 220, - .init_seq = hx8340bn_init, - .init_seq_sz = ARRAY_SIZE(hx8340bn_init), - }, - { - .name = "ili9225", - .width = 176, - .height = 220, - .regwidth = 16, - .init_seq = ili9225_init, - .init_seq_sz = ARRAY_SIZE(ili9225_init), - }, - { - .name = "ili9320", - .width = 240, - .height = 320, - .setaddrwin = 1, - .regwidth = 16, - .init_seq = ili9320_init, - .init_seq_sz = ARRAY_SIZE(ili9320_init), - }, - { - .name = "ili9325", - .width = 240, - .height = 320, - .setaddrwin = 1, - .regwidth = 16, - .init_seq = ili9325_init, - .init_seq_sz = ARRAY_SIZE(ili9325_init), - }, - { - .name = "ili9341", - .width = 240, - .height = 320, - .init_seq = ili9341_init, - .init_seq_sz = ARRAY_SIZE(ili9341_init), - }, - { - .name = "ssd1289", - .width = 240, - .height = 320, - .setaddrwin = 2, - .regwidth = 16, - .init_seq = ssd1289_init, - .init_seq_sz = ARRAY_SIZE(ssd1289_init), - }, - { - .name = "ssd1351", - .width = 128, - .height = 128, - .setaddrwin = 3, - .init_seq = ssd1351_init, - .init_seq_sz = ARRAY_SIZE(ssd1351_init), - }, -}; - -/* ili9320, ili9325 */ -static void flexfb_set_addr_win_1(struct fbtft_par *par, - int xs, int ys, int xe, int ye) -{ - switch (par->info->var.rotate) { - /* R20h = Horizontal GRAM Start Address */ - /* R21h = Vertical GRAM Start Address */ - case 0: - write_reg(par, 0x0020, xs); - write_reg(par, 0x0021, ys); - break; - case 180: - write_reg(par, 0x0020, width - 1 - xs); - write_reg(par, 0x0021, height - 1 - ys); - break; - case 270: - write_reg(par, 0x0020, width - 1 - ys); - write_reg(par, 0x0021, xs); - break; - case 90: - write_reg(par, 0x0020, ys); - write_reg(par, 0x0021, height - 1 - xs); - break; - } - write_reg(par, 0x0022); /* Write Data to GRAM */ -} - -/* ssd1289 */ -static void flexfb_set_addr_win_2(struct fbtft_par *par, - int xs, int ys, int xe, int ye) -{ - switch (par->info->var.rotate) { - /* R4Eh - Set GDDRAM X address counter */ - /* R4Fh - Set GDDRAM Y address counter */ - case 0: - write_reg(par, 0x4e, xs); - write_reg(par, 0x4f, ys); - break; - case 180: - write_reg(par, 0x4e, par->info->var.xres - 1 - xs); - write_reg(par, 0x4f, par->info->var.yres - 1 - ys); - break; - case 270: - write_reg(par, 0x4e, par->info->var.yres - 1 - ys); - write_reg(par, 0x4f, xs); - break; - case 90: - write_reg(par, 0x4e, ys); - write_reg(par, 0x4f, par->info->var.xres - 1 - xs); - break; - } - - /* R22h - RAM data write */ - write_reg(par, 0x22, 0); -} - -/* ssd1351 */ -static void set_addr_win_3(struct fbtft_par *par, - int xs, int ys, int xe, int ye) -{ - write_reg(par, 0x15, xs, xe); - write_reg(par, 0x75, ys, ye); - write_reg(par, 0x5C); -} - -static int flexfb_verify_gpios_dc(struct fbtft_par *par) -{ - fbtft_par_dbg(DEBUG_VERIFY_GPIOS, par, "%s()\n", __func__); - - if (!par->gpio.dc) { - dev_err(par->info->device, - "Missing info about 'dc' gpio. Aborting.\n"); - return -EINVAL; - } - - return 0; -} - -static int flexfb_verify_gpios_db(struct fbtft_par *par) -{ - int i; - int num_db = buswidth; - - fbtft_par_dbg(DEBUG_VERIFY_GPIOS, par, "%s()\n", __func__); - - if (!par->gpio.dc) { - dev_err(par->info->device, "Missing info about 'dc' gpio. Aborting.\n"); - return -EINVAL; - } - if (!par->gpio.wr) { - dev_err(par->info->device, "Missing info about 'wr' gpio. Aborting.\n"); - return -EINVAL; - } - if (latched && !par->gpio.latch) { - dev_err(par->info->device, "Missing info about 'latch' gpio. Aborting.\n"); - return -EINVAL; - } - if (latched) - num_db = buswidth / 2; - for (i = 0; i < num_db; i++) { - if (!par->gpio.db[i]) { - dev_err(par->info->device, - "Missing info about 'db%02d' gpio. Aborting.\n", - i); - return -EINVAL; - } - } - - return 0; -} - -static void flexfb_chip_load_param(const struct flexfb_lcd_controller *chip) -{ - if (!width) - width = chip->width; - if (!height) - height = chip->height; - setaddrwin = chip->setaddrwin; - if (chip->regwidth) - regwidth = chip->regwidth; - if (!init_num) { - initp = chip->init_seq; - initp_num = chip->init_seq_sz; - } -} - -static struct fbtft_display flex_display = { }; - -static int flexfb_chip_init(const struct device *dev) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(flexfb_chip_table); i++) - if (!strcmp(chip, flexfb_chip_table[i].name)) { - flexfb_chip_load_param(&flexfb_chip_table[i]); - return 0; - } - - dev_err(dev, "chip=%s is not supported\n", chip); - - return -EINVAL; -} - -static int flexfb_probe_common(struct spi_device *sdev, - struct platform_device *pdev) -{ - struct device *dev; - struct fb_info *info; - struct fbtft_par *par; - int ret; - - initp = init; - initp_num = init_num; - - if (sdev) - dev = &sdev->dev; - else - dev = &pdev->dev; - - fbtft_init_dbg(dev, "%s(%s)\n", __func__, - sdev ? "'SPI device'" : "'Platform device'"); - - if (chip) { - ret = flexfb_chip_init(dev); - if (ret) - return ret; - } - - if (width == 0 || height == 0) { - dev_err(dev, "argument(s) missing: width and height has to be set.\n"); - return -EINVAL; - } - flex_display.width = width; - flex_display.height = height; - fbtft_init_dbg(dev, "Display resolution: %dx%d\n", width, height); - fbtft_init_dbg(dev, "chip = %s\n", chip ? chip : "not set"); - fbtft_init_dbg(dev, "setaddrwin = %d\n", setaddrwin); - fbtft_init_dbg(dev, "regwidth = %d\n", regwidth); - fbtft_init_dbg(dev, "buswidth = %d\n", buswidth); - - info = fbtft_framebuffer_alloc(&flex_display, dev, dev->platform_data); - if (!info) - return -ENOMEM; - - par = info->par; - if (sdev) - par->spi = sdev; - else - par->pdev = pdev; - if (!par->init_sequence) - par->init_sequence = initp; - par->fbtftops.init_display = fbtft_init_display; - - /* registerwrite functions */ - switch (regwidth) { - case 8: - par->fbtftops.write_register = fbtft_write_reg8_bus8; - break; - case 16: - par->fbtftops.write_register = fbtft_write_reg16_bus8; - break; - default: - dev_err(dev, - "argument 'regwidth': %d is not supported.\n", - regwidth); - return -EINVAL; - } - - /* bus functions */ - if (sdev) { - par->fbtftops.write = fbtft_write_spi; - switch (buswidth) { - case 8: - par->fbtftops.write_vmem = fbtft_write_vmem16_bus8; - if (!par->startbyte) - par->fbtftops.verify_gpios = flexfb_verify_gpios_dc; - break; - case 9: - if (regwidth == 16) { - dev_err(dev, "argument 'regwidth': %d is not supported with buswidth=%d and SPI.\n", - regwidth, buswidth); - return -EINVAL; - } - par->fbtftops.write_register = fbtft_write_reg8_bus9; - par->fbtftops.write_vmem = fbtft_write_vmem16_bus9; - if (par->spi->master->bits_per_word_mask - & SPI_BPW_MASK(9)) { - par->spi->bits_per_word = 9; - break; - } - - dev_warn(dev, - "9-bit SPI not available, emulating using 8-bit.\n"); - /* allocate buffer with room for dc bits */ - par->extra = devm_kzalloc(par->info->device, - par->txbuf.len - + (par->txbuf.len / 8) + 8, - GFP_KERNEL); - if (!par->extra) { - ret = -ENOMEM; - goto out_release; - } - par->fbtftops.write = fbtft_write_spi_emulate_9; - - break; - default: - dev_err(dev, - "argument 'buswidth': %d is not supported with SPI.\n", - buswidth); - return -EINVAL; - } - } else { - par->fbtftops.verify_gpios = flexfb_verify_gpios_db; - switch (buswidth) { - case 8: - par->fbtftops.write = fbtft_write_gpio8_wr; - par->fbtftops.write_vmem = fbtft_write_vmem16_bus8; - break; - case 16: - par->fbtftops.write_register = fbtft_write_reg16_bus16; - if (latched) - par->fbtftops.write = fbtft_write_gpio16_wr_latched; - else - par->fbtftops.write = fbtft_write_gpio16_wr; - par->fbtftops.write_vmem = fbtft_write_vmem16_bus16; - break; - default: - dev_err(dev, - "argument 'buswidth': %d is not supported with parallel.\n", - buswidth); - return -EINVAL; - } - } - - /* set_addr_win function */ - switch (setaddrwin) { - case 0: - /* use default */ - break; - case 1: - par->fbtftops.set_addr_win = flexfb_set_addr_win_1; - break; - case 2: - par->fbtftops.set_addr_win = flexfb_set_addr_win_2; - break; - case 3: - par->fbtftops.set_addr_win = set_addr_win_3; - break; - default: - dev_err(dev, "argument 'setaddrwin': unknown value %d.\n", - setaddrwin); - return -EINVAL; - } - - if (!nobacklight) - par->fbtftops.register_backlight = fbtft_register_backlight; - - ret = fbtft_register_framebuffer(info); - if (ret < 0) - goto out_release; - - return 0; - -out_release: - fbtft_framebuffer_release(info); - - return ret; -} - -static int flexfb_remove_common(struct device *dev, struct fb_info *info) -{ - struct fbtft_par *par; - - if (!info) - return -EINVAL; - par = info->par; - if (par) - fbtft_par_dbg(DEBUG_DRIVER_INIT_FUNCTIONS, par, "%s()\n", - __func__); - fbtft_unregister_framebuffer(info); - fbtft_framebuffer_release(info); - - return 0; -} - -static int flexfb_probe_spi(struct spi_device *spi) -{ - return flexfb_probe_common(spi, NULL); -} - -static int flexfb_remove_spi(struct spi_device *spi) -{ - struct fb_info *info = spi_get_drvdata(spi); - - return flexfb_remove_common(&spi->dev, info); -} - -static int flexfb_probe_pdev(struct platform_device *pdev) -{ - return flexfb_probe_common(NULL, pdev); -} - -static int flexfb_remove_pdev(struct platform_device *pdev) -{ - struct fb_info *info = platform_get_drvdata(pdev); - - return flexfb_remove_common(&pdev->dev, info); -} - -static struct spi_driver flexfb_spi_driver = { - .driver = { - .name = DRVNAME, - }, - .probe = flexfb_probe_spi, - .remove = flexfb_remove_spi, -}; - -static const struct platform_device_id flexfb_platform_ids[] = { - { "flexpfb", 0 }, - { }, -}; -MODULE_DEVICE_TABLE(platform, flexfb_platform_ids); - -static struct platform_driver flexfb_platform_driver = { - .driver = { - .name = DRVNAME, - }, - .id_table = flexfb_platform_ids, - .probe = flexfb_probe_pdev, - .remove = flexfb_remove_pdev, -}; - -static int __init flexfb_init(void) -{ - int ret, ret2; - - ret = spi_register_driver(&flexfb_spi_driver); - ret2 = platform_driver_register(&flexfb_platform_driver); - if (ret < 0) - return ret; - return ret2; -} - -static void __exit flexfb_exit(void) -{ - spi_unregister_driver(&flexfb_spi_driver); - platform_driver_unregister(&flexfb_platform_driver); -} - -/* ------------------------------------------------------------------------- */ - -module_init(flexfb_init); -module_exit(flexfb_exit); - -MODULE_DESCRIPTION("Generic FB driver for TFT LCD displays"); -MODULE_AUTHOR("Noralf Tronnes"); -MODULE_LICENSE("GPL"); diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c index c64728fc21f2..83469061a542 100644 --- a/drivers/staging/octeon/ethernet-tx.c +++ b/drivers/staging/octeon/ethernet-tx.c @@ -261,11 +261,11 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) /* Build the PKO buffer pointer */ hw_buffer.u64 = 0; if (skb_shinfo(skb)->nr_frags == 0) { - hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data); + hw_buffer.s.addr = XKPHYS_TO_PHYS((uintptr_t)skb->data); hw_buffer.s.pool = 0; hw_buffer.s.size = skb->len; } else { - hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data); + hw_buffer.s.addr = XKPHYS_TO_PHYS((uintptr_t)skb->data); hw_buffer.s.pool = 0; hw_buffer.s.size = skb_headlen(skb); CVM_OCT_SKB_CB(skb)[0] = hw_buffer.u64; @@ -273,11 +273,12 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) skb_frag_t *fs = skb_shinfo(skb)->frags + i; hw_buffer.s.addr = - XKPHYS_TO_PHYS((u64)skb_frag_address(fs)); + XKPHYS_TO_PHYS((uintptr_t)skb_frag_address(fs)); hw_buffer.s.size = skb_frag_size(fs); CVM_OCT_SKB_CB(skb)[i + 1] = hw_buffer.u64; } - hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)CVM_OCT_SKB_CB(skb)); + hw_buffer.s.addr = + XKPHYS_TO_PHYS((uintptr_t)CVM_OCT_SKB_CB(skb)); hw_buffer.s.size = skb_shinfo(skb)->nr_frags + 1; pko_command.s.segs = skb_shinfo(skb)->nr_frags + 1; pko_command.s.gather = 1; @@ -349,10 +350,8 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev) */ dst_release(skb_dst(skb)); skb_dst_set(skb, NULL); -#ifdef CONFIG_XFRM - secpath_reset(skb); -#endif - nf_reset(skb); + skb_ext_reset(skb); + nf_reset_ct(skb); #ifdef CONFIG_NET_SCHED skb->tc_index = 0; diff --git a/drivers/staging/octeon/octeon-stubs.h b/drivers/staging/octeon/octeon-stubs.h index a4ac3bfb62a8..b78ce9eaab85 100644 --- a/drivers/staging/octeon/octeon-stubs.h +++ b/drivers/staging/octeon/octeon-stubs.h @@ -1202,7 +1202,7 @@ static inline int cvmx_wqe_get_grp(cvmx_wqe_t *work) static inline void *cvmx_phys_to_ptr(uint64_t physical_address) { - return (void *)(physical_address); + return (void *)(uintptr_t)(physical_address); } static inline uint64_t cvmx_ptr_to_phys(void *ptr) diff --git a/drivers/staging/rtl8188eu/hal/hal8188e_rate_adaptive.c b/drivers/staging/rtl8188eu/hal/hal8188e_rate_adaptive.c index 9ddd51685063..5792f491b59a 100644 --- a/drivers/staging/rtl8188eu/hal/hal8188e_rate_adaptive.c +++ b/drivers/staging/rtl8188eu/hal/hal8188e_rate_adaptive.c @@ -409,7 +409,7 @@ static int odm_ARFBRefresh_8188E(struct odm_dm_struct *dm_odm, struct odm_ra_inf pRaInfo->PTModeSS = 3; else if (pRaInfo->HighestRate > 0x0b) pRaInfo->PTModeSS = 2; - else if (pRaInfo->HighestRate > 0x0b) + else if (pRaInfo->HighestRate > 0x03) pRaInfo->PTModeSS = 1; else pRaInfo->PTModeSS = 0; diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 664d93a7f90d..4fac9dca798e 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -348,8 +348,10 @@ static struct adapter *rtw_usb_if1_init(struct dvobj_priv *dvobj, } padapter->HalData = kzalloc(sizeof(struct hal_data_8188e), GFP_KERNEL); - if (!padapter->HalData) - DBG_88E("cant not alloc memory for HAL DATA\n"); + if (!padapter->HalData) { + DBG_88E("Failed to allocate memory for HAL data\n"); + goto free_adapter; + } /* step read_chip_version */ rtw_hal_read_chip_version(padapter); diff --git a/drivers/staging/speakup/sysfs-driver-speakup b/drivers/staging/speakup/sysfs-driver-speakup new file mode 100644 index 000000000000..be3f5d6962e9 --- /dev/null +++ b/drivers/staging/speakup/sysfs-driver-speakup @@ -0,0 +1,369 @@ +What: /sys/accessibility/speakup/attrib_bleep +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Beeps the PC speaker when there is an attribute change such as + foreground or background color when using speakup review + commands. One = on, zero = off. + +What: /sys/accessibility/speakup/bell_pos +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This works much like a typewriter bell. If for example 72 is + echoed to bell_pos, it will beep the PC speaker when typing on + a line past character 72. + +What: /sys/accessibility/speakup/bleeps +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This controls whether one hears beeps through the PC speaker + when using speakup's review commands. + TODO: what values does it accept? + +What: /sys/accessibility/speakup/bleep_time +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This controls the duration of the PC speaker beeps speakup + produces. + TODO: What are the units? Jiffies? + +What: /sys/accessibility/speakup/cursor_time +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This controls cursor delay when using arrow keys. When a + connection is very slow, with the default setting, when moving + with the arrows, or backspacing etc. speakup says the incorrect + characters. Set this to a higher value to adjust for the delay + and better synchronisation between cursor position and speech. + +What: /sys/accessibility/speakup/delimiters +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Delimit a word from speakup. + TODO: add more info + +What: /sys/accessibility/speakup/ex_num +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: TODO: + +What: /sys/accessibility/speakup/key_echo +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Controls if speakup speaks keys when they are typed. One = on, + zero = off or don't echo keys. + +What: /sys/accessibility/speakup/keymap +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Speakup keymap remaps keys to Speakup functions. + It uses a binary + format. A special program called genmap is needed to compile a + textual keymap into the binary format which is then loaded into + /sys/accessibility/speakup/keymap. + +What: /sys/accessibility/speakup/no_interrupt +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Controls if typing interrupts output from speakup. With + no_interrupt set to zero, typing on the keyboard will interrupt + speakup if for example + the say screen command is used before the + entire screen is read. + With no_interrupt set to one, if the say + screen command is used, and one then types on the keyboard, + speakup will continue to say the whole screen regardless until + it finishes. + +What: /sys/accessibility/speakup/punc_all +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This is a list of all the punctuation speakup should speak when + punc_level is set to four. + +What: /sys/accessibility/speakup/punc_level +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Controls the level of punctuation spoken as the screen is + displayed, not reviewed. Levels range from zero no punctuation, + to four, all punctuation. One corresponds to punc_some, two + corresponds to punc_most, and three as well as four both + correspond to punc_all. Some hardware synthesizers may have + different levels each corresponding to three and four for + punc_level. Also note that if punc_level is set to zero, and + key_echo is set to one, typed punctuation is still spoken as it + is typed. + +What: /sys/accessibility/speakup/punc_most +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This is a list of all the punctuation speakup should speak when + punc_level is set to two. + +What: /sys/accessibility/speakup/punc_some +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This is a list of all the punctuation speakup should speak when + punc_level is set to one. + +What: /sys/accessibility/speakup/reading_punc +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Almost the same as punc_level, the differences being that + reading_punc controls the level of punctuation when reviewing + the screen with speakup's screen review commands. The other + difference is that reading_punc set to three speaks punc_all, + and reading_punc set to four speaks all punctuation, including + spaces. + +What: /sys/accessibility/speakup/repeats +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: A list of characters speakup repeats. Normally, when there are + more than three characters in a row, speakup + just reads three of + those characters. For example, "......" would be read as dot, + dot, dot. If a . is added to the list of characters in repeats, + "......" would be read as dot, dot, dot, times six. + +What: /sys/accessibility/speakup/say_control +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: If set to one, speakup speaks shift, alt and control when those + keys are pressed. If say_control is set to zero, shift, ctrl, + and alt are not spoken when they are pressed. + +What: /sys/accessibility/speakup/say_word_ctl +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: TODO: + +What: /sys/accessibility/speakup/silent +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: TODO: + +What: /sys/accessibility/speakup/spell_delay +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This controls how fast a word is spelled + when speakup's say word + review command is pressed twice quickly to speak the current + word being reviewed. Zero just speaks the letters one after + another, while values one through four + seem to introduce more of + a pause between the spelling of each letter by speakup. + +What: /sys/accessibility/speakup/synth +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Gets or sets the synthesizer driver currently in use. Reading + synth returns the synthesizer driver currently in use. Writing + synth switches to the given synthesizer driver, provided it is + either built into the kernel, or already loaded as a module. + +What: /sys/accessibility/speakup/synth_direct +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Sends whatever is written to synth_direct + directly to the speech synthesizer in use, bypassing speakup. + This could be used to make the synthesizer speak + a string, or to + send control sequences to the synthesizer to change how the + synthesizer behaves. + +What: /sys/accessibility/speakup/version +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Reading version returns the version of speakup, and the version + of the synthesizer driver currently in use. + +What: /sys/accessibility/speakup/i18n/announcements +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This file contains various general announcements, most of which + cannot be categorized. You will find messages such as "You + killed Speakup", "I'm alive", "leaving help", "parked", + "unparked", and others. You will also find the names of the + screen edges and cursor tracking modes here. + +What: /sys/accessibility/speakup/i18n/chartab +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: TODO + +What: /sys/accessibility/speakup/i18n/ctl_keys +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Here, you will find names of control keys. These are used with + Speakup's say_control feature. + +What: /sys/accessibility/speakup/i18n/function_names +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Here, you will find a list of names for Speakup functions. + These are used by the help system. For example, suppose that + you have activated help mode, and you pressed + keypad 3. Speakup + says: "keypad 3 is character, say next." + The message "character, say next" names a Speakup function, and + it comes from this function_names file. + +What: /sys/accessibility/speakup/i18n/states +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This file contains names for key states. + Again, these are part of the help system. For instance, if you + had pressed speakup + keypad 3, you would hear: + "speakup keypad 3 is go to bottom edge." + The speakup key is depressed, so the name of the key state is + speakup. + This part of the message comes from the states collection. + +What: /sys/accessibility/speakup/i18n/characters +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Through this sys entry, Speakup gives you the ability to change + how Speakup pronounces a given character. You could, for + example, change how some punctuation characters are spoken. You + can even change how Speakup will pronounce certain letters. For + further details see '12. Changing the Pronunciation of + Characters' in Speakup User's Guide (file spkguide.txt in + source). + +What: /sys/accessibility/speakup/i18n/colors +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: When you use the "say attributes" function, Speakup says the + name of the foreground and background colors. These names come + from the i18n/colors file. + +What: /sys/accessibility/speakup/i18n/formatted +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This group of messages contains embedded formatting codes, to + specify the type and width of displayed data. If you change + these, you must preserve all of the formatting codes, and they + must appear in the order used by the default messages. + +What: /sys/accessibility/speakup/i18n/key_names +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Again, key_names is used by Speakup's help system. In the + previous example, Speakup said that you pressed "keypad 3." + This name came from the key_names file. + +What: /sys/accessibility/speakup/<synth-name>/ +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: In `/sys/accessibility/speakup` is a directory corresponding to + the synthesizer driver currently in use (E.G) `soft` for the + soft driver. This directory contains files which control the + speech synthesizer itself, + as opposed to controlling the speakup + screen reader. The parameters in this directory have the same + names and functions across all + supported synthesizers. The range + of values for freq, pitch, rate, and vol is the same for all + supported synthesizers, with the given range being internally + mapped by the driver to more or less fit the range of values + supported for a given parameter by the individual synthesizer. + Below is a description of values and parameters for soft + synthesizer, which is currently the most commonly used. + +What: /sys/accessibility/speakup/soft/caps_start +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This is the string that is sent to the synthesizer to cause it + to start speaking uppercase letters. For the soft synthesizer + and most others, this causes the pitch of the voice to rise + above the currently set pitch. + +What: /sys/accessibility/speakup/soft/caps_stop +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This is the string sent to the synthesizer to cause it to stop + speaking uppercase letters. In the case of the soft synthesizer + and most others, this returns the pitch of the voice + down to the + currently set pitch. + +What: /sys/accessibility/speakup/soft/delay_time +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: TODO: + +What: /sys/accessibility/speakup/soft/direct +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Controls if punctuation is spoken by speakup, or by the + synthesizer. + For example, speakup speaks ">" as "greater", while + the espeak synthesizer used by the soft driver speaks "greater + than". Zero lets speakup speak the punctuation. One lets the + synthesizer itself speak punctuation. + +What: /sys/accessibility/speakup/soft/freq +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Gets or sets the frequency of the speech synthesizer. Range is + 0-9. + +What: /sys/accessibility/speakup/soft/full_time +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: TODO: + +What: /sys/accessibility/speakup/soft/jiffy_delta +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: This controls how many jiffys the kernel gives to the + synthesizer. Setting this too high can make a system unstable, + or even crash it. + +What: /sys/accessibility/speakup/soft/pitch +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Gets or sets the pitch of the synthesizer. The range is 0-9. + +What: /sys/accessibility/speakup/soft/punct +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Gets or sets the amount of punctuation spoken by the + synthesizer. The range for the soft driver seems to be 0-2. + TODO: How is this related to speakup's punc_level, or + reading_punc. + +What: /sys/accessibility/speakup/soft/rate +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Gets or sets the rate of the synthesizer. Range is from zero + slowest, to nine fastest. + +What: /sys/accessibility/speakup/soft/tone +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Gets or sets the tone of the speech synthesizer. The range for + the soft driver seems to be 0-2. This seems to make no + difference if using espeak and the espeakup connector. + TODO: does espeakup support different tonalities? + +What: /sys/accessibility/speakup/soft/trigger_time +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: TODO: + +What: /sys/accessibility/speakup/soft/voice +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Gets or sets the voice used by the synthesizer if the + synthesizer can speak in more than one voice. The range for the + soft driver is 0-7. Note that while espeak supports multiple + voices, this parameter will not set the voice when the espeakup + connector is used between speakup and espeak. + +What: /sys/accessibility/speakup/soft/vol +KernelVersion: 2.6 +Contact: speakup@linux-speakup.org +Description: Gets or sets the volume of the speech synthesizer. Range is 0-9, + with zero being the softest, and nine being the loudest. + diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c index bc1eaa3a0773..826016c3431a 100644 --- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c +++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c @@ -12,7 +12,7 @@ static const struct snd_pcm_hardware snd_bcm2835_playback_hw = { .info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_DRAIN_TRIGGER | SNDRV_PCM_INFO_SYNC_APPLPTR), + SNDRV_PCM_INFO_SYNC_APPLPTR), .formats = SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S16_LE, .rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_8000_48000, .rate_min = 8000, @@ -29,7 +29,7 @@ static const struct snd_pcm_hardware snd_bcm2835_playback_hw = { static const struct snd_pcm_hardware snd_bcm2835_playback_spdif_hw = { .info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_DRAIN_TRIGGER | SNDRV_PCM_INFO_SYNC_APPLPTR), + SNDRV_PCM_INFO_SYNC_APPLPTR), .formats = SNDRV_PCM_FMTBIT_S16_LE, .rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000, diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c index 23fba01107b9..c6f9cf1913d2 100644 --- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c +++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c @@ -289,6 +289,7 @@ int bcm2835_audio_stop(struct bcm2835_alsa_stream *alsa_stream) VC_AUDIO_MSG_TYPE_STOP, false); } +/* FIXME: this doesn't seem working as expected for "draining" */ int bcm2835_audio_drain(struct bcm2835_alsa_stream *alsa_stream) { struct vc_audio_msg m = { diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index c6bb4aaf9bd0..082302944c37 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1748,8 +1748,10 @@ vt6655_probe(struct pci_dev *pcid, const struct pci_device_id *ent) priv->hw->max_signal = 100; - if (vnt_init(priv)) + if (vnt_init(priv)) { + device_free_info(priv); return -ENODEV; + } device_print_info(priv); pci_set_drvdata(pcid, priv); diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index e55c79eb6430..98361acd3053 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -968,6 +968,11 @@ static int __init n_hdlc_init(void) } /* end of init_module() */ +#ifdef CONFIG_SPARC +#undef __exitdata +#define __exitdata +#endif + static const char hdlc_unregister_ok[] __exitdata = KERN_INFO "N_HDLC: line discipline unregistered\n"; static const char hdlc_unregister_fail[] __exitdata = diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index c68e2b3a1634..836e736ae188 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -141,7 +141,7 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) serial8250_do_set_mctrl(port, mctrl); - if (!up->gpios) { + if (!mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_RTS)) { /* * Turn off autoRTS if RTS is lowered and restore autoRTS * setting if RTS is raised @@ -456,7 +456,8 @@ static void omap_8250_set_termios(struct uart_port *port, up->port.status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS | UPSTAT_AUTOXOFF); if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW && - !up->gpios) { + !mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_RTS) && + !mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_CTS)) { /* Enable AUTOCTS (autoRTS is enabled when RTS is raised) */ up->port.status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS; priv->efr |= UART_EFR_CTS; diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 4789b5d62f63..67a9eb3f94ce 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1032,6 +1032,7 @@ config SERIAL_SIFIVE_CONSOLE bool "Console on SiFive UART" depends on SERIAL_SIFIVE=y select SERIAL_CORE_CONSOLE + select SERIAL_EARLYCON help Select this option if you would like to use a SiFive UART as the system console. diff --git a/drivers/tty/serial/fsl_linflexuart.c b/drivers/tty/serial/fsl_linflexuart.c index 68d74f2b5106..a32f0d2afd59 100644 --- a/drivers/tty/serial/fsl_linflexuart.c +++ b/drivers/tty/serial/fsl_linflexuart.c @@ -3,7 +3,7 @@ * Freescale linflexuart serial port driver * * Copyright 2012-2016 Freescale Semiconductor, Inc. - * Copyright 2017-2018 NXP + * Copyright 2017-2019 NXP */ #if defined(CONFIG_SERIAL_FSL_LINFLEXUART_CONSOLE) && \ @@ -246,12 +246,14 @@ static irqreturn_t linflex_rxint(int irq, void *dev_id) struct tty_port *port = &sport->state->port; unsigned long flags, status; unsigned char rx; + bool brk; spin_lock_irqsave(&sport->lock, flags); status = readl(sport->membase + UARTSR); while (status & LINFLEXD_UARTSR_RMB) { rx = readb(sport->membase + BDRM); + brk = false; flg = TTY_NORMAL; sport->icount.rx++; @@ -261,8 +263,11 @@ static irqreturn_t linflex_rxint(int irq, void *dev_id) status |= LINFLEXD_UARTSR_SZF; if (status & LINFLEXD_UARTSR_BOF) status |= LINFLEXD_UARTSR_BOF; - if (status & LINFLEXD_UARTSR_FEF) + if (status & LINFLEXD_UARTSR_FEF) { + if (!rx) + brk = true; status |= LINFLEXD_UARTSR_FEF; + } if (status & LINFLEXD_UARTSR_PE) status |= LINFLEXD_UARTSR_PE; } @@ -271,13 +276,15 @@ static irqreturn_t linflex_rxint(int irq, void *dev_id) sport->membase + UARTSR); status = readl(sport->membase + UARTSR); - if (uart_handle_sysrq_char(sport, (unsigned char)rx)) - continue; - + if (brk) { + uart_handle_break(sport); + } else { #ifdef SUPPORT_SYSRQ - sport->sysrq = 0; + if (uart_handle_sysrq_char(sport, (unsigned char)rx)) + continue; #endif - tty_insert_flip_char(port, rx, flg); + tty_insert_flip_char(port, rx, flg); + } } spin_unlock_irqrestore(&sport->lock, flags); diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 3e17bb8a0b16..537896c4d887 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -548,7 +548,7 @@ static void lpuart_flush_buffer(struct uart_port *port) val |= UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH; lpuart32_write(&sport->port, val, UARTFIFO); } else { - val = readb(sport->port.membase + UARTPFIFO); + val = readb(sport->port.membase + UARTCFIFO); val |= UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH; writeb(val, sport->port.membase + UARTCFIFO); } diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 87c58f9f6390..5e08f2657b90 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2222,8 +2222,8 @@ static int imx_uart_probe(struct platform_device *pdev) return PTR_ERR(base); rxirq = platform_get_irq(pdev, 0); - txirq = platform_get_irq(pdev, 1); - rtsirq = platform_get_irq(pdev, 2); + txirq = platform_get_irq_optional(pdev, 1); + rtsirq = platform_get_irq_optional(pdev, 2); sport->port.dev = &pdev->dev; sport->port.mapbase = res->start; diff --git a/drivers/tty/serial/owl-uart.c b/drivers/tty/serial/owl-uart.c index 03963af77b15..d2d8b3494685 100644 --- a/drivers/tty/serial/owl-uart.c +++ b/drivers/tty/serial/owl-uart.c @@ -740,7 +740,7 @@ static int __init owl_uart_init(void) return ret; } -static void __init owl_uart_exit(void) +static void __exit owl_uart_exit(void) { platform_driver_unregister(&owl_uart_platform_driver); uart_unregister_driver(&owl_uart_driver); diff --git a/drivers/tty/serial/rda-uart.c b/drivers/tty/serial/rda-uart.c index c1b0d7662ef9..ff9a27d48bca 100644 --- a/drivers/tty/serial/rda-uart.c +++ b/drivers/tty/serial/rda-uart.c @@ -815,7 +815,7 @@ static int __init rda_uart_init(void) return ret; } -static void __init rda_uart_exit(void) +static void __exit rda_uart_exit(void) { platform_driver_unregister(&rda_uart_platform_driver); uart_unregister_driver(&rda_uart_driver); diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 6e713be1d4e9..c4a414a46c7f 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1964,8 +1964,10 @@ uart_get_console(struct uart_port *ports, int nr, struct console *co) * console=<name>,io|mmio|mmio16|mmio32|mmio32be|mmio32native,<addr>,<options> * * The optional form + * * earlycon=<name>,0x<addr>,<options> * console=<name>,0x<addr>,<options> + * * is also accepted; the returned @iotype will be UPIO_MEM. * * Returns 0 on success or -EINVAL on failure diff --git a/drivers/tty/serial/serial_mctrl_gpio.c b/drivers/tty/serial/serial_mctrl_gpio.c index d9074303c88e..fb4781292d40 100644 --- a/drivers/tty/serial/serial_mctrl_gpio.c +++ b/drivers/tty/serial/serial_mctrl_gpio.c @@ -66,6 +66,9 @@ EXPORT_SYMBOL_GPL(mctrl_gpio_set); struct gpio_desc *mctrl_gpio_to_gpiod(struct mctrl_gpios *gpios, enum mctrl_gpio_idx gidx) { + if (gpios == NULL) + return NULL; + return gpios->gpio[gidx]; } EXPORT_SYMBOL_GPL(mctrl_gpio_to_gpiod); diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 4e754a4850e6..22e5d4e13714 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2894,8 +2894,12 @@ static int sci_init_single(struct platform_device *dev, port->mapbase = res->start; sci_port->reg_size = resource_size(res); - for (i = 0; i < ARRAY_SIZE(sci_port->irqs); ++i) - sci_port->irqs[i] = platform_get_irq(dev, i); + for (i = 0; i < ARRAY_SIZE(sci_port->irqs); ++i) { + if (i) + sci_port->irqs[i] = platform_get_irq_optional(dev, i); + else + sci_port->irqs[i] = platform_get_irq(dev, i); + } /* The SCI generates several interrupts. They can be muxed together or * connected to different interrupt lines. In the muxed case only one diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c index b8b912b5a8b9..06e79c11141d 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c @@ -897,7 +897,8 @@ static int __init ulite_init(void) static void __exit ulite_exit(void) { platform_driver_unregister(&ulite_platform_driver); - uart_unregister_driver(&ulite_uart_driver); + if (ulite_uart_driver.state) + uart_unregister_driver(&ulite_uart_driver); } module_init(ulite_init); diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index da4563aaaf5c..4e55bc327a54 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1550,7 +1550,6 @@ static int cdns_uart_probe(struct platform_device *pdev) goto err_out_id; } - uartps_major = cdns_uart_uart_driver->tty_driver->major; cdns_uart_data->cdns_uart_driver = cdns_uart_uart_driver; /* @@ -1680,6 +1679,7 @@ static int cdns_uart_probe(struct platform_device *pdev) console_port = NULL; #endif + uartps_major = cdns_uart_uart_driver->tty_driver->major; cdns_uart_data->cts_override = of_property_read_bool(pdev->dev.of_node, "cts-override"); return 0; @@ -1741,6 +1741,12 @@ static int cdns_uart_remove(struct platform_device *pdev) console_port = NULL; #endif + /* If this is last instance major number should be initialized */ + mutex_lock(&bitmap_lock); + if (bitmap_empty(bitmap, MAX_UART_INSTANCES)) + uartps_major = 0; + mutex_unlock(&bitmap_lock); + uart_unregister_driver(cdns_uart_data->cdns_uart_driver); return rc; } diff --git a/drivers/usb/cdns3/cdns3-pci-wrap.c b/drivers/usb/cdns3/cdns3-pci-wrap.c index c41ddb61b857..b0a29efe7d31 100644 --- a/drivers/usb/cdns3/cdns3-pci-wrap.c +++ b/drivers/usb/cdns3/cdns3-pci-wrap.c @@ -159,8 +159,9 @@ static int cdns3_pci_probe(struct pci_dev *pdev, wrap->plat_dev = platform_device_register_full(&plat_info); if (IS_ERR(wrap->plat_dev)) { pci_disable_device(pdev); + err = PTR_ERR(wrap->plat_dev); kfree(wrap); - return PTR_ERR(wrap->plat_dev); + return err; } } diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 06f1e105be4e..1109dc5a4c39 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -160,10 +160,28 @@ static int cdns3_core_init_role(struct cdns3 *cdns) if (ret) goto err; - if (cdns->dr_mode != USB_DR_MODE_OTG) { + /* Initialize idle role to start with */ + ret = cdns3_role_start(cdns, USB_ROLE_NONE); + if (ret) + goto err; + + switch (cdns->dr_mode) { + case USB_DR_MODE_UNKNOWN: + case USB_DR_MODE_OTG: ret = cdns3_hw_role_switch(cdns); if (ret) goto err; + break; + case USB_DR_MODE_PERIPHERAL: + ret = cdns3_role_start(cdns, USB_ROLE_DEVICE); + if (ret) + goto err; + break; + case USB_DR_MODE_HOST: + ret = cdns3_role_start(cdns, USB_ROLE_HOST); + if (ret) + goto err; + break; } return ret; diff --git a/drivers/usb/cdns3/ep0.c b/drivers/usb/cdns3/ep0.c index 44f652e8b5a2..e71240b386b4 100644 --- a/drivers/usb/cdns3/ep0.c +++ b/drivers/usb/cdns3/ep0.c @@ -234,9 +234,11 @@ static int cdns3_req_ep0_set_address(struct cdns3_device *priv_dev, static int cdns3_req_ep0_get_status(struct cdns3_device *priv_dev, struct usb_ctrlrequest *ctrl) { + struct cdns3_endpoint *priv_ep; __le16 *response_pkt; u16 usb_status = 0; u32 recip; + u8 index; recip = ctrl->bRequestType & USB_RECIP_MASK; @@ -262,9 +264,13 @@ static int cdns3_req_ep0_get_status(struct cdns3_device *priv_dev, case USB_RECIP_INTERFACE: return cdns3_ep0_delegate_req(priv_dev, ctrl); case USB_RECIP_ENDPOINT: - /* check if endpoint is stalled */ + index = cdns3_ep_addr_to_index(ctrl->wIndex); + priv_ep = priv_dev->eps[index]; + + /* check if endpoint is stalled or stall is pending */ cdns3_select_ep(priv_dev, ctrl->wIndex); - if (EP_STS_STALL(readl(&priv_dev->regs->ep_sts))) + if (EP_STS_STALL(readl(&priv_dev->regs->ep_sts)) || + (priv_ep->flags & EP_STALL_PENDING)) usb_status = BIT(USB_ENDPOINT_HALT); break; default: @@ -332,7 +338,7 @@ static int cdns3_ep0_feature_handle_device(struct cdns3_device *priv_dev, * for sending status stage. * This time should be less then 3ms. */ - usleep_range(1000, 2000); + mdelay(1); cdns3_set_register_bit(&priv_dev->regs->usb_cmd, USB_CMD_STMODE | USB_STS_TMODE_SEL(tmode - 1)); diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 228cdc4ab886..2ca280f4c054 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -2571,6 +2571,7 @@ static int cdns3_gadget_start(struct cdns3 *cdns) switch (max_speed) { case USB_SPEED_FULL: writel(USB_CONF_SFORCE_FS, &priv_dev->regs->usb_conf); + writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf); break; case USB_SPEED_HIGH: writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf); @@ -2662,6 +2663,13 @@ static int __cdns3_gadget_init(struct cdns3 *cdns) { int ret = 0; + /* Ensure 32-bit DMA Mask in case we switched back from Host mode */ + ret = dma_set_mask_and_coherent(cdns->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(cdns->dev, "Failed to set dma mask: %d\n", ret); + return ret; + } + cdns3_drd_switch_gadget(cdns, 1); pm_runtime_get_sync(cdns->dev); diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 7fea4999d352..fb8bd60c83f4 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -461,10 +461,12 @@ static int usblp_release(struct inode *inode, struct file *file) mutex_lock(&usblp_mutex); usblp->used = 0; - if (usblp->present) { + if (usblp->present) usblp_unlink_urbs(usblp); - usb_autopm_put_interface(usblp->intf); - } else /* finish cleanup from disconnect */ + + usb_autopm_put_interface(usblp->intf); + + if (!usblp->present) /* finish cleanup from disconnect */ usblp_cleanup(usblp); mutex_unlock(&usblp_mutex); return 0; diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c index 726100d1ac0d..c946d64142ad 100644 --- a/drivers/usb/dwc3/drd.c +++ b/drivers/usb/dwc3/drd.c @@ -139,14 +139,14 @@ static int dwc3_otg_get_irq(struct dwc3 *dwc) struct platform_device *dwc3_pdev = to_platform_device(dwc->dev); int irq; - irq = platform_get_irq_byname(dwc3_pdev, "otg"); + irq = platform_get_irq_byname_optional(dwc3_pdev, "otg"); if (irq > 0) goto out; if (irq == -EPROBE_DEFER) goto out; - irq = platform_get_irq_byname(dwc3_pdev, "dwc_usb3"); + irq = platform_get_irq_byname_optional(dwc3_pdev, "dwc_usb3"); if (irq > 0) goto out; @@ -157,9 +157,6 @@ static int dwc3_otg_get_irq(struct dwc3 *dwc) if (irq > 0) goto out; - if (irq != -EPROBE_DEFER) - dev_err(dwc->dev, "missing OTG IRQ\n"); - if (!irq) irq = -EINVAL; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8adb59f8e4f1..86dc1db788a9 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3264,14 +3264,14 @@ static int dwc3_gadget_get_irq(struct dwc3 *dwc) struct platform_device *dwc3_pdev = to_platform_device(dwc->dev); int irq; - irq = platform_get_irq_byname(dwc3_pdev, "peripheral"); + irq = platform_get_irq_byname_optional(dwc3_pdev, "peripheral"); if (irq > 0) goto out; if (irq == -EPROBE_DEFER) goto out; - irq = platform_get_irq_byname(dwc3_pdev, "dwc_usb3"); + irq = platform_get_irq_byname_optional(dwc3_pdev, "dwc_usb3"); if (irq > 0) goto out; @@ -3282,9 +3282,6 @@ static int dwc3_gadget_get_irq(struct dwc3 *dwc) if (irq > 0) goto out; - if (irq != -EPROBE_DEFER) - dev_err(dwc->dev, "missing peripheral IRQ\n"); - if (!irq) irq = -EINVAL; diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index 8deea8c91e03..5567ed2cddbe 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -16,14 +16,14 @@ static int dwc3_host_get_irq(struct dwc3 *dwc) struct platform_device *dwc3_pdev = to_platform_device(dwc->dev); int irq; - irq = platform_get_irq_byname(dwc3_pdev, "host"); + irq = platform_get_irq_byname_optional(dwc3_pdev, "host"); if (irq > 0) goto out; if (irq == -EPROBE_DEFER) goto out; - irq = platform_get_irq_byname(dwc3_pdev, "dwc_usb3"); + irq = platform_get_irq_byname_optional(dwc3_pdev, "dwc_usb3"); if (irq > 0) goto out; @@ -34,9 +34,6 @@ static int dwc3_host_get_irq(struct dwc3 *dwc) if (irq > 0) goto out; - if (irq != -EPROBE_DEFER) - dev_err(dwc->dev, "missing host IRQ\n"); - if (!irq) irq = -EINVAL; diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index d7e611645533..d354036ff6c8 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -45,7 +45,7 @@ config USB_AT91 config USB_LPC32XX tristate "LPC32XX USB Peripheral Controller" - depends on ARCH_LPC32XX + depends on ARCH_LPC32XX || COMPILE_TEST depends on I2C select USB_ISP1301 help diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 8414fac74493..3d499d93c083 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -48,6 +48,7 @@ #define DRIVER_VERSION "02 May 2005" #define POWER_BUDGET 500 /* in mA; use 8 for low-power port testing */ +#define POWER_BUDGET_3 900 /* in mA */ static const char driver_name[] = "dummy_hcd"; static const char driver_desc[] = "USB Host+Gadget Emulator"; @@ -2432,7 +2433,7 @@ static int dummy_start_ss(struct dummy_hcd *dum_hcd) dum_hcd->rh_state = DUMMY_RH_RUNNING; dum_hcd->stream_en_ep = 0; INIT_LIST_HEAD(&dum_hcd->urbp_list); - dummy_hcd_to_hcd(dum_hcd)->power_budget = POWER_BUDGET; + dummy_hcd_to_hcd(dum_hcd)->power_budget = POWER_BUDGET_3; dummy_hcd_to_hcd(dum_hcd)->state = HC_STATE_RUNNING; dummy_hcd_to_hcd(dum_hcd)->uses_new_polling = 1; #ifdef CONFIG_USB_OTG diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c index b3e073fb88c6..2b1f3cc7819b 100644 --- a/drivers/usb/gadget/udc/lpc32xx_udc.c +++ b/drivers/usb/gadget/udc/lpc32xx_udc.c @@ -1151,7 +1151,7 @@ static void udc_pop_fifo(struct lpc32xx_udc *udc, u8 *data, u32 bytes) u32 *p32, tmp, cbytes; /* Use optimal data transfer method based on source address and size */ - switch (((u32) data) & 0x3) { + switch (((uintptr_t) data) & 0x3) { case 0: /* 32-bit aligned */ p32 = (u32 *) data; cbytes = (bytes & ~0x3); @@ -1252,7 +1252,7 @@ static void udc_stuff_fifo(struct lpc32xx_udc *udc, u8 *data, u32 bytes) u32 *p32, tmp, cbytes; /* Use optimal data transfer method based on source address and size */ - switch (((u32) data) & 0x3) { + switch (((uintptr_t) data) & 0x3) { case 0: /* 32-bit aligned */ p32 = (u32 *) data; cbytes = (bytes & ~0x3); diff --git a/drivers/usb/host/xhci-ext-caps.c b/drivers/usb/host/xhci-ext-caps.c index f498160df969..3351d07c431f 100644 --- a/drivers/usb/host/xhci-ext-caps.c +++ b/drivers/usb/host/xhci-ext-caps.c @@ -57,6 +57,7 @@ static int xhci_create_intel_xhci_sw_pdev(struct xhci_hcd *xhci, u32 cap_offset) ret = platform_device_add_properties(pdev, role_switch_props); if (ret) { dev_err(dev, "failed to register device properties\n"); + platform_device_put(pdev); return ret; } } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 9741cdeea9d7..85ceb43e3405 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3202,10 +3202,10 @@ static int xhci_align_td(struct xhci_hcd *xhci, struct urb *urb, u32 enqd_len, if (usb_urb_dir_out(urb)) { len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs, seg->bounce_buf, new_buff_len, enqd_len); - if (len != seg->bounce_len) + if (len != new_buff_len) xhci_warn(xhci, "WARN Wrong bounce buffer write length: %zu != %d\n", - len, seg->bounce_len); + len, new_buff_len); seg->bounce_dma = dma_map_single(dev, seg->bounce_buf, max_pkt, DMA_TO_DEVICE); } else { diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 500865975687..517ec3206f6e 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1032,7 +1032,7 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) writel(command, &xhci->op_regs->command); xhci->broken_suspend = 0; if (xhci_handshake(&xhci->op_regs->status, - STS_SAVE, 0, 10 * 1000)) { + STS_SAVE, 0, 20 * 1000)) { /* * AMD SNPS xHC 3.0 occasionally does not clear the * SSS bit of USBSTS and when driver tries to poll @@ -1108,6 +1108,18 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) hibernated = true; if (!hibernated) { + /* + * Some controllers might lose power during suspend, so wait + * for controller not ready bit to clear, just as in xHC init. + */ + retval = xhci_handshake(&xhci->op_regs->status, + STS_CNR, 0, 10 * 1000 * 1000); + if (retval) { + xhci_warn(xhci, "Controller not ready at resume %d\n", + retval); + spin_unlock_irq(&xhci->lock); + return retval; + } /* step 1: restore register */ xhci_restore_registers(xhci); /* step 2: initialize command ring buffer */ @@ -3083,6 +3095,7 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, unsigned int ep_index; unsigned long flags; u32 ep_flag; + int err; xhci = hcd_to_xhci(hcd); if (!host_ep->hcpriv) @@ -3142,7 +3155,17 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, xhci_free_command(xhci, cfg_cmd); goto cleanup; } - xhci_queue_stop_endpoint(xhci, stop_cmd, udev->slot_id, ep_index, 0); + + err = xhci_queue_stop_endpoint(xhci, stop_cmd, udev->slot_id, + ep_index, 0); + if (err < 0) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_free_command(xhci, cfg_cmd); + xhci_dbg(xhci, "%s: Failed to queue stop ep command, %d ", + __func__, err); + goto cleanup; + } + xhci_ring_cmd_db(xhci); spin_unlock_irqrestore(&xhci->lock, flags); @@ -3156,8 +3179,16 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, ctrl_ctx, ep_flag, ep_flag); xhci_endpoint_copy(xhci, cfg_cmd->in_ctx, vdev->out_ctx, ep_index); - xhci_queue_configure_endpoint(xhci, cfg_cmd, cfg_cmd->in_ctx->dma, + err = xhci_queue_configure_endpoint(xhci, cfg_cmd, cfg_cmd->in_ctx->dma, udev->slot_id, false); + if (err < 0) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_free_command(xhci, cfg_cmd); + xhci_dbg(xhci, "%s: Failed to queue config ep command, %d ", + __func__, err); + goto cleanup; + } + xhci_ring_cmd_db(xhci); spin_unlock_irqrestore(&xhci->lock, flags); @@ -4674,12 +4705,12 @@ static int xhci_update_timeout_for_endpoint(struct xhci_hcd *xhci, alt_timeout = xhci_call_host_update_timeout_for_endpoint(xhci, udev, desc, state, timeout); - /* If we found we can't enable hub-initiated LPM, or + /* If we found we can't enable hub-initiated LPM, and * the U1 or U2 exit latency was too high to allow - * device-initiated LPM as well, just stop searching. + * device-initiated LPM as well, then we will disable LPM + * for this device, so stop searching any further. */ - if (alt_timeout == USB3_LPM_DISABLED || - alt_timeout == USB3_LPM_DEVICE_INITIATED) { + if (alt_timeout == USB3_LPM_DISABLED) { *timeout = alt_timeout; return -E2BIG; } @@ -4790,10 +4821,12 @@ static u16 xhci_calculate_lpm_timeout(struct usb_hcd *hcd, if (intf->dev.driver) { driver = to_usb_driver(intf->dev.driver); if (driver && driver->disable_hub_initiated_lpm) { - dev_dbg(&udev->dev, "Hub-initiated %s disabled " - "at request of driver %s\n", - state_name, driver->name); - return xhci_get_timeout_no_hub_lpm(udev, state); + dev_dbg(&udev->dev, "Hub-initiated %s disabled at request of driver %s\n", + state_name, driver->name); + timeout = xhci_get_timeout_no_hub_lpm(udev, + state); + if (timeout == USB3_LPM_DISABLED) + return timeout; } } @@ -5077,11 +5110,18 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) hcd->has_tt = 1; } else { /* - * Some 3.1 hosts return sbrn 0x30, use xhci supported protocol - * minor revision instead of sbrn. Minor revision is a two digit - * BCD containing minor and sub-minor numbers, only show minor. + * Early xHCI 1.1 spec did not mention USB 3.1 capable hosts + * should return 0x31 for sbrn, or that the minor revision + * is a two digit BCD containig minor and sub-minor numbers. + * This was later clarified in xHCI 1.2. + * + * Some USB 3.1 capable hosts therefore have sbrn 0x30, and + * minor revision set to 0x1 instead of 0x10. */ - minor_rev = xhci->usb3_rhub.min_rev / 0x10; + if (xhci->usb3_rhub.min_rev == 0x1) + minor_rev = 1; + else + minor_rev = xhci->usb3_rhub.min_rev / 0x10; switch (minor_rev) { case 2: @@ -5198,8 +5238,16 @@ static void xhci_clear_tt_buffer_complete(struct usb_hcd *hcd, unsigned int ep_index; unsigned long flags; + /* + * udev might be NULL if tt buffer is cleared during a failed device + * enumeration due to a halted control endpoint. Usb core might + * have allocated a new udev for the next enumeration attempt. + */ + xhci = hcd_to_xhci(hcd); udev = (struct usb_device *)ep->hcpriv; + if (!udev) + return; slot_id = udev->slot_id; ep_index = xhci_get_endpoint_index(&ep->desc); diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c index 0a57c2cc8e5a..7a6b122c833f 100644 --- a/drivers/usb/image/microtek.c +++ b/drivers/usb/image/microtek.c @@ -716,6 +716,10 @@ static int mts_usb_probe(struct usb_interface *intf, } + if (ep_in_current != &ep_in_set[2]) { + MTS_WARNING("couldn't find two input bulk endpoints. Bailing out.\n"); + return -ENODEV; + } if ( ep_out == -1 ) { MTS_WARNING( "couldn't find an output bulk endpoint. Bailing out.\n" ); diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig index bdae62b2ffe0..9bce583aada3 100644 --- a/drivers/usb/misc/Kconfig +++ b/drivers/usb/misc/Kconfig @@ -47,16 +47,6 @@ config USB_SEVSEG To compile this driver as a module, choose M here: the module will be called usbsevseg. -config USB_RIO500 - tristate "USB Diamond Rio500 support" - help - Say Y here if you want to connect a USB Rio500 mp3 player to your - computer's USB port. Please read <file:Documentation/usb/rio.rst> - for more information. - - To compile this driver as a module, choose M here: the - module will be called rio500. - config USB_LEGOTOWER tristate "USB Lego Infrared Tower support" help diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile index 109f54f5b9aa..0d416eb624bb 100644 --- a/drivers/usb/misc/Makefile +++ b/drivers/usb/misc/Makefile @@ -17,7 +17,6 @@ obj-$(CONFIG_USB_ISIGHTFW) += isight_firmware.o obj-$(CONFIG_USB_LCD) += usblcd.o obj-$(CONFIG_USB_LD) += ldusb.o obj-$(CONFIG_USB_LEGOTOWER) += legousbtower.o -obj-$(CONFIG_USB_RIO500) += rio500.o obj-$(CONFIG_USB_TEST) += usbtest.o obj-$(CONFIG_USB_EHSET_TEST_FIXTURE) += ehset.o obj-$(CONFIG_USB_TRANCEVIBRATOR) += trancevibrator.o diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c index 344d523b0502..6f5edb9fc61e 100644 --- a/drivers/usb/misc/adutux.c +++ b/drivers/usb/misc/adutux.c @@ -75,6 +75,7 @@ struct adu_device { char serial_number[8]; int open_count; /* number of times this port has been opened */ + unsigned long disconnected:1; char *read_buffer_primary; int read_buffer_length; @@ -116,7 +117,7 @@ static void adu_abort_transfers(struct adu_device *dev) { unsigned long flags; - if (dev->udev == NULL) + if (dev->disconnected) return; /* shutdown transfer */ @@ -148,6 +149,7 @@ static void adu_delete(struct adu_device *dev) kfree(dev->read_buffer_secondary); kfree(dev->interrupt_in_buffer); kfree(dev->interrupt_out_buffer); + usb_put_dev(dev->udev); kfree(dev); } @@ -243,7 +245,7 @@ static int adu_open(struct inode *inode, struct file *file) } dev = usb_get_intfdata(interface); - if (!dev || !dev->udev) { + if (!dev) { retval = -ENODEV; goto exit_no_device; } @@ -326,7 +328,7 @@ static int adu_release(struct inode *inode, struct file *file) } adu_release_internal(dev); - if (dev->udev == NULL) { + if (dev->disconnected) { /* the device was unplugged before the file was released */ if (!dev->open_count) /* ... and we're the last user */ adu_delete(dev); @@ -354,7 +356,7 @@ static ssize_t adu_read(struct file *file, __user char *buffer, size_t count, return -ERESTARTSYS; /* verify that the device wasn't unplugged */ - if (dev->udev == NULL) { + if (dev->disconnected) { retval = -ENODEV; pr_err("No device or device unplugged %d\n", retval); goto exit; @@ -518,7 +520,7 @@ static ssize_t adu_write(struct file *file, const __user char *buffer, goto exit_nolock; /* verify that the device wasn't unplugged */ - if (dev->udev == NULL) { + if (dev->disconnected) { retval = -ENODEV; pr_err("No device or device unplugged %d\n", retval); goto exit; @@ -663,7 +665,7 @@ static int adu_probe(struct usb_interface *interface, mutex_init(&dev->mtx); spin_lock_init(&dev->buflock); - dev->udev = udev; + dev->udev = usb_get_dev(udev); init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); @@ -762,14 +764,18 @@ static void adu_disconnect(struct usb_interface *interface) dev = usb_get_intfdata(interface); - mutex_lock(&dev->mtx); /* not interruptible */ - dev->udev = NULL; /* poison */ usb_deregister_dev(interface, &adu_class); - mutex_unlock(&dev->mtx); + + usb_poison_urb(dev->interrupt_in_urb); + usb_poison_urb(dev->interrupt_out_urb); mutex_lock(&adutux_mutex); usb_set_intfdata(interface, NULL); + mutex_lock(&dev->mtx); /* not interruptible */ + dev->disconnected = 1; + mutex_unlock(&dev->mtx); + /* if the device is not opened, then we clean up right now */ if (!dev->open_count) adu_delete(dev); diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index cf5828ce927a..34e6cd6f40d3 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -98,6 +98,7 @@ static void chaoskey_free(struct chaoskey *dev) usb_free_urb(dev->urb); kfree(dev->name); kfree(dev->buf); + usb_put_intf(dev->interface); kfree(dev); } } @@ -145,6 +146,8 @@ static int chaoskey_probe(struct usb_interface *interface, if (dev == NULL) goto out; + dev->interface = usb_get_intf(interface); + dev->buf = kmalloc(size, GFP_KERNEL); if (dev->buf == NULL) @@ -174,8 +177,6 @@ static int chaoskey_probe(struct usb_interface *interface, goto out; } - dev->interface = interface; - dev->in_ep = in_ep; if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index f5bed9f29e56..dce44fbf031f 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -54,11 +54,7 @@ MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); -/* Module parameters */ -static DEFINE_MUTEX(iowarrior_mutex); - static struct usb_driver iowarrior_driver; -static DEFINE_MUTEX(iowarrior_open_disc_lock); /*--------------*/ /* data */ @@ -87,6 +83,7 @@ struct iowarrior { char chip_serial[9]; /* the serial number string of the chip connected */ int report_size; /* number of bytes in a report */ u16 product_id; + struct usb_anchor submitted; }; /*--------------*/ @@ -243,6 +240,7 @@ static inline void iowarrior_delete(struct iowarrior *dev) kfree(dev->int_in_buffer); usb_free_urb(dev->int_in_urb); kfree(dev->read_queue); + usb_put_intf(dev->interface); kfree(dev); } @@ -424,11 +422,13 @@ static ssize_t iowarrior_write(struct file *file, retval = -EFAULT; goto error; } + usb_anchor_urb(int_out_urb, &dev->submitted); retval = usb_submit_urb(int_out_urb, GFP_KERNEL); if (retval) { dev_dbg(&dev->interface->dev, "submit error %d for urb nr.%d\n", retval, atomic_read(&dev->write_busy)); + usb_unanchor_urb(int_out_urb); goto error; } /* submit was ok */ @@ -477,8 +477,6 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, if (!buffer) return -ENOMEM; - /* lock this object */ - mutex_lock(&iowarrior_mutex); mutex_lock(&dev->mutex); /* verify that the device wasn't unplugged */ @@ -571,7 +569,6 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, error_out: /* unlock the device */ mutex_unlock(&dev->mutex); - mutex_unlock(&iowarrior_mutex); kfree(buffer); return retval; } @@ -586,27 +583,20 @@ static int iowarrior_open(struct inode *inode, struct file *file) int subminor; int retval = 0; - mutex_lock(&iowarrior_mutex); subminor = iminor(inode); interface = usb_find_interface(&iowarrior_driver, subminor); if (!interface) { - mutex_unlock(&iowarrior_mutex); - printk(KERN_ERR "%s - error, can't find device for minor %d\n", + pr_err("%s - error, can't find device for minor %d\n", __func__, subminor); return -ENODEV; } - mutex_lock(&iowarrior_open_disc_lock); dev = usb_get_intfdata(interface); - if (!dev) { - mutex_unlock(&iowarrior_open_disc_lock); - mutex_unlock(&iowarrior_mutex); + if (!dev) return -ENODEV; - } mutex_lock(&dev->mutex); - mutex_unlock(&iowarrior_open_disc_lock); /* Only one process can open each device, no sharing. */ if (dev->opened) { @@ -628,7 +618,6 @@ static int iowarrior_open(struct inode *inode, struct file *file) out: mutex_unlock(&dev->mutex); - mutex_unlock(&iowarrior_mutex); return retval; } @@ -764,11 +753,13 @@ static int iowarrior_probe(struct usb_interface *interface, init_waitqueue_head(&dev->write_wait); dev->udev = udev; - dev->interface = interface; + dev->interface = usb_get_intf(interface); iface_desc = interface->cur_altsetting; dev->product_id = le16_to_cpu(udev->descriptor.idProduct); + init_usb_anchor(&dev->submitted); + res = usb_find_last_int_in_endpoint(iface_desc, &dev->int_in_endpoint); if (res) { dev_err(&interface->dev, "no interrupt-in endpoint found\n"); @@ -836,7 +827,6 @@ static int iowarrior_probe(struct usb_interface *interface, if (retval) { /* something prevented us from registering this driver */ dev_err(&interface->dev, "Not able to get a minor for this device.\n"); - usb_set_intfdata(interface, NULL); goto error; } @@ -860,26 +850,15 @@ error: */ static void iowarrior_disconnect(struct usb_interface *interface) { - struct iowarrior *dev; - int minor; - - dev = usb_get_intfdata(interface); - mutex_lock(&iowarrior_open_disc_lock); - usb_set_intfdata(interface, NULL); - /* prevent device read, write and ioctl */ - dev->present = 0; - - minor = dev->minor; - mutex_unlock(&iowarrior_open_disc_lock); - /* give back our minor - this will call close() locks need to be dropped at this point*/ + struct iowarrior *dev = usb_get_intfdata(interface); + int minor = dev->minor; usb_deregister_dev(interface, &iowarrior_class); mutex_lock(&dev->mutex); /* prevent device read, write and ioctl */ - - mutex_unlock(&dev->mutex); + dev->present = 0; if (dev->opened) { /* There is a process that holds a filedescriptor to the device , @@ -887,10 +866,13 @@ static void iowarrior_disconnect(struct usb_interface *interface) Deleting the device is postponed until close() was called. */ usb_kill_urb(dev->int_in_urb); + usb_kill_anchored_urbs(&dev->submitted); wake_up_interruptible(&dev->read_wait); wake_up_interruptible(&dev->write_wait); + mutex_unlock(&dev->mutex); } else { /* no process is using the device, cleanup now */ + mutex_unlock(&dev->mutex); iowarrior_delete(dev); } diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index 6581774bdfa4..f3108d85e768 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -153,6 +153,7 @@ MODULE_PARM_DESC(min_interrupt_out_interval, "Minimum interrupt out interval in struct ld_usb { struct mutex mutex; /* locks this structure */ struct usb_interface *intf; /* save off the usb interface pointer */ + unsigned long disconnected:1; int open_count; /* number of times this port has been opened */ @@ -192,12 +193,10 @@ static void ld_usb_abort_transfers(struct ld_usb *dev) /* shutdown transfer */ if (dev->interrupt_in_running) { dev->interrupt_in_running = 0; - if (dev->intf) - usb_kill_urb(dev->interrupt_in_urb); + usb_kill_urb(dev->interrupt_in_urb); } if (dev->interrupt_out_busy) - if (dev->intf) - usb_kill_urb(dev->interrupt_out_urb); + usb_kill_urb(dev->interrupt_out_urb); } /** @@ -205,8 +204,6 @@ static void ld_usb_abort_transfers(struct ld_usb *dev) */ static void ld_usb_delete(struct ld_usb *dev) { - ld_usb_abort_transfers(dev); - /* free data structures */ usb_free_urb(dev->interrupt_in_urb); usb_free_urb(dev->interrupt_out_urb); @@ -263,7 +260,7 @@ static void ld_usb_interrupt_in_callback(struct urb *urb) resubmit: /* resubmit if we're still running */ - if (dev->interrupt_in_running && !dev->buffer_overflow && dev->intf) { + if (dev->interrupt_in_running && !dev->buffer_overflow) { retval = usb_submit_urb(dev->interrupt_in_urb, GFP_ATOMIC); if (retval) { dev_err(&dev->intf->dev, @@ -392,7 +389,7 @@ static int ld_usb_release(struct inode *inode, struct file *file) retval = -ENODEV; goto unlock_exit; } - if (dev->intf == NULL) { + if (dev->disconnected) { /* the device was unplugged before the file was released */ mutex_unlock(&dev->mutex); /* unlock here as ld_usb_delete frees dev */ @@ -423,7 +420,7 @@ static __poll_t ld_usb_poll(struct file *file, poll_table *wait) dev = file->private_data; - if (!dev->intf) + if (dev->disconnected) return EPOLLERR | EPOLLHUP; poll_wait(file, &dev->read_wait, wait); @@ -462,7 +459,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, } /* verify that the device wasn't unplugged */ - if (dev->intf == NULL) { + if (dev->disconnected) { retval = -ENODEV; printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval); goto unlock_exit; @@ -542,7 +539,7 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer, } /* verify that the device wasn't unplugged */ - if (dev->intf == NULL) { + if (dev->disconnected) { retval = -ENODEV; printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval); goto unlock_exit; @@ -764,6 +761,9 @@ static void ld_usb_disconnect(struct usb_interface *intf) /* give back our minor */ usb_deregister_dev(intf, &ld_usb_class); + usb_poison_urb(dev->interrupt_in_urb); + usb_poison_urb(dev->interrupt_out_urb); + mutex_lock(&dev->mutex); /* if the device is not opened, then we clean up right now */ @@ -771,7 +771,7 @@ static void ld_usb_disconnect(struct usb_interface *intf) mutex_unlock(&dev->mutex); ld_usb_delete(dev); } else { - dev->intf = NULL; + dev->disconnected = 1; /* wake up pollers */ wake_up_interruptible_all(&dev->read_wait); wake_up_interruptible_all(&dev->write_wait); diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index 006cf13b2199..9d4c52a7ebe0 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -179,7 +179,6 @@ static const struct usb_device_id tower_table[] = { }; MODULE_DEVICE_TABLE (usb, tower_table); -static DEFINE_MUTEX(open_disc_mutex); #define LEGO_USB_TOWER_MINOR_BASE 160 @@ -191,6 +190,7 @@ struct lego_usb_tower { unsigned char minor; /* the starting minor number for this device */ int open_count; /* number of times this port has been opened */ + unsigned long disconnected:1; char* read_buffer; size_t read_buffer_length; /* this much came in */ @@ -290,14 +290,13 @@ static inline void lego_usb_tower_debug_data(struct device *dev, */ static inline void tower_delete (struct lego_usb_tower *dev) { - tower_abort_transfers (dev); - /* free data structures */ usb_free_urb(dev->interrupt_in_urb); usb_free_urb(dev->interrupt_out_urb); kfree (dev->read_buffer); kfree (dev->interrupt_in_buffer); kfree (dev->interrupt_out_buffer); + usb_put_dev(dev->udev); kfree (dev); } @@ -332,18 +331,14 @@ static int tower_open (struct inode *inode, struct file *file) goto exit; } - mutex_lock(&open_disc_mutex); dev = usb_get_intfdata(interface); - if (!dev) { - mutex_unlock(&open_disc_mutex); retval = -ENODEV; goto exit; } /* lock this device */ if (mutex_lock_interruptible(&dev->lock)) { - mutex_unlock(&open_disc_mutex); retval = -ERESTARTSYS; goto exit; } @@ -351,12 +346,9 @@ static int tower_open (struct inode *inode, struct file *file) /* allow opening only once */ if (dev->open_count) { - mutex_unlock(&open_disc_mutex); retval = -EBUSY; goto unlock_exit; } - dev->open_count = 1; - mutex_unlock(&open_disc_mutex); /* reset the tower */ result = usb_control_msg (dev->udev, @@ -396,13 +388,14 @@ static int tower_open (struct inode *inode, struct file *file) dev_err(&dev->udev->dev, "Couldn't submit interrupt_in_urb %d\n", retval); dev->interrupt_in_running = 0; - dev->open_count = 0; goto unlock_exit; } /* save device in the file's private structure */ file->private_data = dev; + dev->open_count = 1; + unlock_exit: mutex_unlock(&dev->lock); @@ -423,10 +416,9 @@ static int tower_release (struct inode *inode, struct file *file) if (dev == NULL) { retval = -ENODEV; - goto exit_nolock; + goto exit; } - mutex_lock(&open_disc_mutex); if (mutex_lock_interruptible(&dev->lock)) { retval = -ERESTARTSYS; goto exit; @@ -438,7 +430,8 @@ static int tower_release (struct inode *inode, struct file *file) retval = -ENODEV; goto unlock_exit; } - if (dev->udev == NULL) { + + if (dev->disconnected) { /* the device was unplugged before the file was released */ /* unlock here as tower_delete frees dev */ @@ -456,10 +449,7 @@ static int tower_release (struct inode *inode, struct file *file) unlock_exit: mutex_unlock(&dev->lock); - exit: - mutex_unlock(&open_disc_mutex); -exit_nolock: return retval; } @@ -477,10 +467,9 @@ static void tower_abort_transfers (struct lego_usb_tower *dev) if (dev->interrupt_in_running) { dev->interrupt_in_running = 0; mb(); - if (dev->udev) - usb_kill_urb (dev->interrupt_in_urb); + usb_kill_urb(dev->interrupt_in_urb); } - if (dev->interrupt_out_busy && dev->udev) + if (dev->interrupt_out_busy) usb_kill_urb(dev->interrupt_out_urb); } @@ -516,7 +505,7 @@ static __poll_t tower_poll (struct file *file, poll_table *wait) dev = file->private_data; - if (!dev->udev) + if (dev->disconnected) return EPOLLERR | EPOLLHUP; poll_wait(file, &dev->read_wait, wait); @@ -563,7 +552,7 @@ static ssize_t tower_read (struct file *file, char __user *buffer, size_t count, } /* verify that the device wasn't unplugged */ - if (dev->udev == NULL) { + if (dev->disconnected) { retval = -ENODEV; pr_err("No device or device unplugged %d\n", retval); goto unlock_exit; @@ -649,7 +638,7 @@ static ssize_t tower_write (struct file *file, const char __user *buffer, size_t } /* verify that the device wasn't unplugged */ - if (dev->udev == NULL) { + if (dev->disconnected) { retval = -ENODEV; pr_err("No device or device unplugged %d\n", retval); goto unlock_exit; @@ -759,7 +748,7 @@ static void tower_interrupt_in_callback (struct urb *urb) resubmit: /* resubmit if we're still running */ - if (dev->interrupt_in_running && dev->udev) { + if (dev->interrupt_in_running) { retval = usb_submit_urb (dev->interrupt_in_urb, GFP_ATOMIC); if (retval) dev_err(&dev->udev->dev, @@ -822,8 +811,9 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device mutex_init(&dev->lock); - dev->udev = udev; + dev->udev = usb_get_dev(udev); dev->open_count = 0; + dev->disconnected = 0; dev->read_buffer = NULL; dev->read_buffer_length = 0; @@ -891,8 +881,10 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device get_version_reply, sizeof(*get_version_reply), 1000); - if (result < 0) { - dev_err(idev, "LEGO USB Tower get version control request failed\n"); + if (result < sizeof(*get_version_reply)) { + if (result >= 0) + result = -EIO; + dev_err(idev, "get version request failed: %d\n", result); retval = result; goto error; } @@ -910,7 +902,6 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device if (retval) { /* something prevented us from registering this driver */ dev_err(idev, "Not able to get a minor for this device.\n"); - usb_set_intfdata (interface, NULL); goto error; } dev->minor = interface->minor; @@ -942,23 +933,24 @@ static void tower_disconnect (struct usb_interface *interface) int minor; dev = usb_get_intfdata (interface); - mutex_lock(&open_disc_mutex); - usb_set_intfdata (interface, NULL); minor = dev->minor; - /* give back our minor */ + /* give back our minor and prevent further open() */ usb_deregister_dev (interface, &tower_class); + /* stop I/O */ + usb_poison_urb(dev->interrupt_in_urb); + usb_poison_urb(dev->interrupt_out_urb); + mutex_lock(&dev->lock); - mutex_unlock(&open_disc_mutex); /* if the device is not opened, then we clean up right now */ if (!dev->open_count) { mutex_unlock(&dev->lock); tower_delete (dev); } else { - dev->udev = NULL; + dev->disconnected = 1; /* wake up pollers */ wake_up_interruptible_all(&dev->read_wait); wake_up_interruptible_all(&dev->write_wait); diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c deleted file mode 100644 index 30cae5e1954d..000000000000 --- a/drivers/usb/misc/rio500.c +++ /dev/null @@ -1,554 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* -*- linux-c -*- */ - -/* - * Driver for USB Rio 500 - * - * Cesar Miquel (miquel@df.uba.ar) - * - * based on hp_scanner.c by David E. Nelson (dnelson@jump.net) - * - * Based upon mouse.c (Brad Keryan) and printer.c (Michael Gee). - * - * Changelog: - * 30/05/2003 replaced lock/unlock kernel with up/down - * Daniele Bellucci bellucda@tiscali.it - * */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/sched/signal.h> -#include <linux/mutex.h> -#include <linux/errno.h> -#include <linux/random.h> -#include <linux/poll.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/usb.h> -#include <linux/wait.h> - -#include "rio500_usb.h" - -#define DRIVER_AUTHOR "Cesar Miquel <miquel@df.uba.ar>" -#define DRIVER_DESC "USB Rio 500 driver" - -#define RIO_MINOR 64 - -/* stall/wait timeout for rio */ -#define NAK_TIMEOUT (HZ) - -#define IBUF_SIZE 0x1000 - -/* Size of the rio buffer */ -#define OBUF_SIZE 0x10000 - -struct rio_usb_data { - struct usb_device *rio_dev; /* init: probe_rio */ - unsigned int ifnum; /* Interface number of the USB device */ - int isopen; /* nz if open */ - int present; /* Device is present on the bus */ - char *obuf, *ibuf; /* transfer buffers */ - char bulk_in_ep, bulk_out_ep; /* Endpoint assignments */ - wait_queue_head_t wait_q; /* for timeouts */ -}; - -static DEFINE_MUTEX(rio500_mutex); -static struct rio_usb_data rio_instance; - -static int open_rio(struct inode *inode, struct file *file) -{ - struct rio_usb_data *rio = &rio_instance; - - /* against disconnect() */ - mutex_lock(&rio500_mutex); - - if (rio->isopen || !rio->present) { - mutex_unlock(&rio500_mutex); - return -EBUSY; - } - rio->isopen = 1; - - init_waitqueue_head(&rio->wait_q); - - - dev_info(&rio->rio_dev->dev, "Rio opened.\n"); - mutex_unlock(&rio500_mutex); - - return 0; -} - -static int close_rio(struct inode *inode, struct file *file) -{ - struct rio_usb_data *rio = &rio_instance; - - /* against disconnect() */ - mutex_lock(&rio500_mutex); - - rio->isopen = 0; - if (!rio->present) { - /* cleanup has been delayed */ - kfree(rio->ibuf); - kfree(rio->obuf); - rio->ibuf = NULL; - rio->obuf = NULL; - } else { - dev_info(&rio->rio_dev->dev, "Rio closed.\n"); - } - mutex_unlock(&rio500_mutex); - return 0; -} - -static long ioctl_rio(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct RioCommand rio_cmd; - struct rio_usb_data *rio = &rio_instance; - void __user *data; - unsigned char *buffer; - int result, requesttype; - int retries; - int retval=0; - - mutex_lock(&rio500_mutex); - /* Sanity check to make sure rio is connected, powered, etc */ - if (rio->present == 0 || rio->rio_dev == NULL) { - retval = -ENODEV; - goto err_out; - } - - switch (cmd) { - case RIO_RECV_COMMAND: - data = (void __user *) arg; - if (data == NULL) - break; - if (copy_from_user(&rio_cmd, data, sizeof(struct RioCommand))) { - retval = -EFAULT; - goto err_out; - } - if (rio_cmd.length < 0 || rio_cmd.length > PAGE_SIZE) { - retval = -EINVAL; - goto err_out; - } - buffer = (unsigned char *) __get_free_page(GFP_KERNEL); - if (buffer == NULL) { - retval = -ENOMEM; - goto err_out; - } - if (copy_from_user(buffer, rio_cmd.buffer, rio_cmd.length)) { - retval = -EFAULT; - free_page((unsigned long) buffer); - goto err_out; - } - - requesttype = rio_cmd.requesttype | USB_DIR_IN | - USB_TYPE_VENDOR | USB_RECIP_DEVICE; - dev_dbg(&rio->rio_dev->dev, - "sending command:reqtype=%0x req=%0x value=%0x index=%0x len=%0x\n", - requesttype, rio_cmd.request, rio_cmd.value, - rio_cmd.index, rio_cmd.length); - /* Send rio control message */ - retries = 3; - while (retries) { - result = usb_control_msg(rio->rio_dev, - usb_rcvctrlpipe(rio-> rio_dev, 0), - rio_cmd.request, - requesttype, - rio_cmd.value, - rio_cmd.index, buffer, - rio_cmd.length, - jiffies_to_msecs(rio_cmd.timeout)); - if (result == -ETIMEDOUT) - retries--; - else if (result < 0) { - dev_err(&rio->rio_dev->dev, - "Error executing ioctrl. code = %d\n", - result); - retries = 0; - } else { - dev_dbg(&rio->rio_dev->dev, - "Executed ioctl. Result = %d (data=%02x)\n", - result, buffer[0]); - if (copy_to_user(rio_cmd.buffer, buffer, - rio_cmd.length)) { - free_page((unsigned long) buffer); - retval = -EFAULT; - goto err_out; - } - retries = 0; - } - - /* rio_cmd.buffer contains a raw stream of single byte - data which has been returned from rio. Data is - interpreted at application level. For data that - will be cast to data types longer than 1 byte, data - will be little_endian and will potentially need to - be swapped at the app level */ - - } - free_page((unsigned long) buffer); - break; - - case RIO_SEND_COMMAND: - data = (void __user *) arg; - if (data == NULL) - break; - if (copy_from_user(&rio_cmd, data, sizeof(struct RioCommand))) { - retval = -EFAULT; - goto err_out; - } - if (rio_cmd.length < 0 || rio_cmd.length > PAGE_SIZE) { - retval = -EINVAL; - goto err_out; - } - buffer = (unsigned char *) __get_free_page(GFP_KERNEL); - if (buffer == NULL) { - retval = -ENOMEM; - goto err_out; - } - if (copy_from_user(buffer, rio_cmd.buffer, rio_cmd.length)) { - free_page((unsigned long)buffer); - retval = -EFAULT; - goto err_out; - } - - requesttype = rio_cmd.requesttype | USB_DIR_OUT | - USB_TYPE_VENDOR | USB_RECIP_DEVICE; - dev_dbg(&rio->rio_dev->dev, - "sending command: reqtype=%0x req=%0x value=%0x index=%0x len=%0x\n", - requesttype, rio_cmd.request, rio_cmd.value, - rio_cmd.index, rio_cmd.length); - /* Send rio control message */ - retries = 3; - while (retries) { - result = usb_control_msg(rio->rio_dev, - usb_sndctrlpipe(rio-> rio_dev, 0), - rio_cmd.request, - requesttype, - rio_cmd.value, - rio_cmd.index, buffer, - rio_cmd.length, - jiffies_to_msecs(rio_cmd.timeout)); - if (result == -ETIMEDOUT) - retries--; - else if (result < 0) { - dev_err(&rio->rio_dev->dev, - "Error executing ioctrl. code = %d\n", - result); - retries = 0; - } else { - dev_dbg(&rio->rio_dev->dev, - "Executed ioctl. Result = %d\n", result); - retries = 0; - - } - - } - free_page((unsigned long) buffer); - break; - - default: - retval = -ENOTTY; - break; - } - - -err_out: - mutex_unlock(&rio500_mutex); - return retval; -} - -static ssize_t -write_rio(struct file *file, const char __user *buffer, - size_t count, loff_t * ppos) -{ - DEFINE_WAIT(wait); - struct rio_usb_data *rio = &rio_instance; - - unsigned long copy_size; - unsigned long bytes_written = 0; - unsigned int partial; - - int result = 0; - int maxretry; - int errn = 0; - int intr; - - intr = mutex_lock_interruptible(&rio500_mutex); - if (intr) - return -EINTR; - /* Sanity check to make sure rio is connected, powered, etc */ - if (rio->present == 0 || rio->rio_dev == NULL) { - mutex_unlock(&rio500_mutex); - return -ENODEV; - } - - - - do { - unsigned long thistime; - char *obuf = rio->obuf; - - thistime = copy_size = - (count >= OBUF_SIZE) ? OBUF_SIZE : count; - if (copy_from_user(rio->obuf, buffer, copy_size)) { - errn = -EFAULT; - goto error; - } - maxretry = 5; - while (thistime) { - if (!rio->rio_dev) { - errn = -ENODEV; - goto error; - } - if (signal_pending(current)) { - mutex_unlock(&rio500_mutex); - return bytes_written ? bytes_written : -EINTR; - } - - result = usb_bulk_msg(rio->rio_dev, - usb_sndbulkpipe(rio->rio_dev, 2), - obuf, thistime, &partial, 5000); - - dev_dbg(&rio->rio_dev->dev, - "write stats: result:%d thistime:%lu partial:%u\n", - result, thistime, partial); - - if (result == -ETIMEDOUT) { /* NAK - so hold for a while */ - if (!maxretry--) { - errn = -ETIME; - goto error; - } - prepare_to_wait(&rio->wait_q, &wait, TASK_INTERRUPTIBLE); - schedule_timeout(NAK_TIMEOUT); - finish_wait(&rio->wait_q, &wait); - continue; - } else if (!result && partial) { - obuf += partial; - thistime -= partial; - } else - break; - } - if (result) { - dev_err(&rio->rio_dev->dev, "Write Whoops - %x\n", - result); - errn = -EIO; - goto error; - } - bytes_written += copy_size; - count -= copy_size; - buffer += copy_size; - } while (count > 0); - - mutex_unlock(&rio500_mutex); - - return bytes_written ? bytes_written : -EIO; - -error: - mutex_unlock(&rio500_mutex); - return errn; -} - -static ssize_t -read_rio(struct file *file, char __user *buffer, size_t count, loff_t * ppos) -{ - DEFINE_WAIT(wait); - struct rio_usb_data *rio = &rio_instance; - ssize_t read_count; - unsigned int partial; - int this_read; - int result; - int maxretry = 10; - char *ibuf; - int intr; - - intr = mutex_lock_interruptible(&rio500_mutex); - if (intr) - return -EINTR; - /* Sanity check to make sure rio is connected, powered, etc */ - if (rio->present == 0 || rio->rio_dev == NULL) { - mutex_unlock(&rio500_mutex); - return -ENODEV; - } - - ibuf = rio->ibuf; - - read_count = 0; - - - while (count > 0) { - if (signal_pending(current)) { - mutex_unlock(&rio500_mutex); - return read_count ? read_count : -EINTR; - } - if (!rio->rio_dev) { - mutex_unlock(&rio500_mutex); - return -ENODEV; - } - this_read = (count >= IBUF_SIZE) ? IBUF_SIZE : count; - - result = usb_bulk_msg(rio->rio_dev, - usb_rcvbulkpipe(rio->rio_dev, 1), - ibuf, this_read, &partial, - 8000); - - dev_dbg(&rio->rio_dev->dev, - "read stats: result:%d this_read:%u partial:%u\n", - result, this_read, partial); - - if (partial) { - count = this_read = partial; - } else if (result == -ETIMEDOUT || result == 15) { /* FIXME: 15 ??? */ - if (!maxretry--) { - mutex_unlock(&rio500_mutex); - dev_err(&rio->rio_dev->dev, - "read_rio: maxretry timeout\n"); - return -ETIME; - } - prepare_to_wait(&rio->wait_q, &wait, TASK_INTERRUPTIBLE); - schedule_timeout(NAK_TIMEOUT); - finish_wait(&rio->wait_q, &wait); - continue; - } else if (result != -EREMOTEIO) { - mutex_unlock(&rio500_mutex); - dev_err(&rio->rio_dev->dev, - "Read Whoops - result:%d partial:%u this_read:%u\n", - result, partial, this_read); - return -EIO; - } else { - mutex_unlock(&rio500_mutex); - return (0); - } - - if (this_read) { - if (copy_to_user(buffer, ibuf, this_read)) { - mutex_unlock(&rio500_mutex); - return -EFAULT; - } - count -= this_read; - read_count += this_read; - buffer += this_read; - } - } - mutex_unlock(&rio500_mutex); - return read_count; -} - -static const struct file_operations usb_rio_fops = { - .owner = THIS_MODULE, - .read = read_rio, - .write = write_rio, - .unlocked_ioctl = ioctl_rio, - .open = open_rio, - .release = close_rio, - .llseek = noop_llseek, -}; - -static struct usb_class_driver usb_rio_class = { - .name = "rio500%d", - .fops = &usb_rio_fops, - .minor_base = RIO_MINOR, -}; - -static int probe_rio(struct usb_interface *intf, - const struct usb_device_id *id) -{ - struct usb_device *dev = interface_to_usbdev(intf); - struct rio_usb_data *rio = &rio_instance; - int retval = -ENOMEM; - char *ibuf, *obuf; - - if (rio->present) { - dev_info(&intf->dev, "Second USB Rio at address %d refused\n", dev->devnum); - return -EBUSY; - } - dev_info(&intf->dev, "USB Rio found at address %d\n", dev->devnum); - - obuf = kmalloc(OBUF_SIZE, GFP_KERNEL); - if (!obuf) { - dev_err(&dev->dev, - "probe_rio: Not enough memory for the output buffer\n"); - goto err_obuf; - } - dev_dbg(&intf->dev, "obuf address: %p\n", obuf); - - ibuf = kmalloc(IBUF_SIZE, GFP_KERNEL); - if (!ibuf) { - dev_err(&dev->dev, - "probe_rio: Not enough memory for the input buffer\n"); - goto err_ibuf; - } - dev_dbg(&intf->dev, "ibuf address: %p\n", ibuf); - - mutex_lock(&rio500_mutex); - rio->rio_dev = dev; - rio->ibuf = ibuf; - rio->obuf = obuf; - rio->present = 1; - mutex_unlock(&rio500_mutex); - - retval = usb_register_dev(intf, &usb_rio_class); - if (retval) { - dev_err(&dev->dev, - "Not able to get a minor for this device.\n"); - goto err_register; - } - - usb_set_intfdata(intf, rio); - return retval; - - err_register: - mutex_lock(&rio500_mutex); - rio->present = 0; - mutex_unlock(&rio500_mutex); - err_ibuf: - kfree(obuf); - err_obuf: - return retval; -} - -static void disconnect_rio(struct usb_interface *intf) -{ - struct rio_usb_data *rio = usb_get_intfdata (intf); - - usb_set_intfdata (intf, NULL); - if (rio) { - usb_deregister_dev(intf, &usb_rio_class); - - mutex_lock(&rio500_mutex); - if (rio->isopen) { - rio->isopen = 0; - /* better let it finish - the release will do whats needed */ - rio->rio_dev = NULL; - mutex_unlock(&rio500_mutex); - return; - } - kfree(rio->ibuf); - kfree(rio->obuf); - - dev_info(&intf->dev, "USB Rio disconnected.\n"); - - rio->present = 0; - mutex_unlock(&rio500_mutex); - } -} - -static const struct usb_device_id rio_table[] = { - { USB_DEVICE(0x0841, 1) }, /* Rio 500 */ - { } /* Terminating entry */ -}; - -MODULE_DEVICE_TABLE (usb, rio_table); - -static struct usb_driver rio_driver = { - .name = "rio500", - .probe = probe_rio, - .disconnect = disconnect_rio, - .id_table = rio_table, -}; - -module_usb_driver(rio_driver); - -MODULE_AUTHOR( DRIVER_AUTHOR ); -MODULE_DESCRIPTION( DRIVER_DESC ); -MODULE_LICENSE("GPL"); - diff --git a/drivers/usb/misc/rio500_usb.h b/drivers/usb/misc/rio500_usb.h deleted file mode 100644 index 6db7a5863496..000000000000 --- a/drivers/usb/misc/rio500_usb.h +++ /dev/null @@ -1,20 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* ---------------------------------------------------------------------- - Copyright (C) 2000 Cesar Miquel (miquel@df.uba.ar) - ---------------------------------------------------------------------- */ - -#define RIO_SEND_COMMAND 0x1 -#define RIO_RECV_COMMAND 0x2 - -#define RIO_DIR_OUT 0x0 -#define RIO_DIR_IN 0x1 - -struct RioCommand { - short length; - int request; - int requesttype; - int value; - int index; - void __user *buffer; - int timeout; -}; diff --git a/drivers/usb/misc/usblcd.c b/drivers/usb/misc/usblcd.c index 9ba4a4e68d91..61e9e987fe4a 100644 --- a/drivers/usb/misc/usblcd.c +++ b/drivers/usb/misc/usblcd.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/errno.h> #include <linux/mutex.h> +#include <linux/rwsem.h> #include <linux/uaccess.h> #include <linux/usb.h> @@ -29,16 +30,12 @@ #define IOCTL_GET_DRV_VERSION 2 -static DEFINE_MUTEX(lcd_mutex); static const struct usb_device_id id_table[] = { { .idVendor = 0x10D2, .match_flags = USB_DEVICE_ID_MATCH_VENDOR, }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); -static DEFINE_MUTEX(open_disc_mutex); - - struct usb_lcd { struct usb_device *udev; /* init: probe_lcd */ struct usb_interface *interface; /* the interface for @@ -57,6 +54,8 @@ struct usb_lcd { using up all RAM */ struct usb_anchor submitted; /* URBs to wait for before suspend */ + struct rw_semaphore io_rwsem; + unsigned long disconnected:1; }; #define to_lcd_dev(d) container_of(d, struct usb_lcd, kref) @@ -81,40 +80,29 @@ static int lcd_open(struct inode *inode, struct file *file) struct usb_interface *interface; int subminor, r; - mutex_lock(&lcd_mutex); subminor = iminor(inode); interface = usb_find_interface(&lcd_driver, subminor); if (!interface) { - mutex_unlock(&lcd_mutex); - printk(KERN_ERR "USBLCD: %s - error, can't find device for minor %d\n", + pr_err("USBLCD: %s - error, can't find device for minor %d\n", __func__, subminor); return -ENODEV; } - mutex_lock(&open_disc_mutex); dev = usb_get_intfdata(interface); - if (!dev) { - mutex_unlock(&open_disc_mutex); - mutex_unlock(&lcd_mutex); - return -ENODEV; - } /* increment our usage count for the device */ kref_get(&dev->kref); - mutex_unlock(&open_disc_mutex); /* grab a power reference */ r = usb_autopm_get_interface(interface); if (r < 0) { kref_put(&dev->kref, lcd_delete); - mutex_unlock(&lcd_mutex); return r; } /* save our object in the file's private structure */ file->private_data = dev; - mutex_unlock(&lcd_mutex); return 0; } @@ -142,6 +130,13 @@ static ssize_t lcd_read(struct file *file, char __user * buffer, dev = file->private_data; + down_read(&dev->io_rwsem); + + if (dev->disconnected) { + retval = -ENODEV; + goto out_up_io; + } + /* do a blocking bulk read to get data from the device */ retval = usb_bulk_msg(dev->udev, usb_rcvbulkpipe(dev->udev, @@ -158,6 +153,9 @@ static ssize_t lcd_read(struct file *file, char __user * buffer, retval = bytes_read; } +out_up_io: + up_read(&dev->io_rwsem); + return retval; } @@ -173,14 +171,12 @@ static long lcd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) switch (cmd) { case IOCTL_GET_HARD_VERSION: - mutex_lock(&lcd_mutex); bcdDevice = le16_to_cpu((dev->udev)->descriptor.bcdDevice); sprintf(buf, "%1d%1d.%1d%1d", (bcdDevice & 0xF000)>>12, (bcdDevice & 0xF00)>>8, (bcdDevice & 0xF0)>>4, (bcdDevice & 0xF)); - mutex_unlock(&lcd_mutex); if (copy_to_user((void __user *)arg, buf, strlen(buf)) != 0) return -EFAULT; break; @@ -237,11 +233,18 @@ static ssize_t lcd_write(struct file *file, const char __user * user_buffer, if (r < 0) return -EINTR; + down_read(&dev->io_rwsem); + + if (dev->disconnected) { + retval = -ENODEV; + goto err_up_io; + } + /* create a urb, and a buffer for it, and copy the data to the urb */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { retval = -ENOMEM; - goto err_no_buf; + goto err_up_io; } buf = usb_alloc_coherent(dev->udev, count, GFP_KERNEL, @@ -278,6 +281,7 @@ static ssize_t lcd_write(struct file *file, const char __user * user_buffer, the USB core will eventually free it entirely */ usb_free_urb(urb); + up_read(&dev->io_rwsem); exit: return count; error_unanchor: @@ -285,7 +289,8 @@ error_unanchor: error: usb_free_coherent(dev->udev, count, buf, urb->transfer_dma); usb_free_urb(urb); -err_no_buf: +err_up_io: + up_read(&dev->io_rwsem); up(&dev->limit_sem); return retval; } @@ -325,6 +330,7 @@ static int lcd_probe(struct usb_interface *interface, kref_init(&dev->kref); sema_init(&dev->limit_sem, USB_LCD_CONCURRENT_WRITES); + init_rwsem(&dev->io_rwsem); init_usb_anchor(&dev->submitted); dev->udev = usb_get_dev(interface_to_usbdev(interface)); @@ -365,7 +371,6 @@ static int lcd_probe(struct usb_interface *interface, /* something prevented us from registering this driver */ dev_err(&interface->dev, "Not able to get a minor for this device.\n"); - usb_set_intfdata(interface, NULL); goto error; } @@ -411,17 +416,18 @@ static int lcd_resume(struct usb_interface *intf) static void lcd_disconnect(struct usb_interface *interface) { - struct usb_lcd *dev; + struct usb_lcd *dev = usb_get_intfdata(interface); int minor = interface->minor; - mutex_lock(&open_disc_mutex); - dev = usb_get_intfdata(interface); - usb_set_intfdata(interface, NULL); - mutex_unlock(&open_disc_mutex); - /* give back our minor */ usb_deregister_dev(interface, &lcd_class); + down_write(&dev->io_rwsem); + dev->disconnected = 1; + up_write(&dev->io_rwsem); + + usb_kill_anchored_urbs(&dev->submitted); + /* decrement our usage count */ kref_put(&dev->kref, lcd_delete); diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 6715a128e6c8..be0505b8b5d4 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -60,6 +60,7 @@ struct usb_yurex { struct kref kref; struct mutex io_mutex; + unsigned long disconnected:1; struct fasync_struct *async_queue; wait_queue_head_t waitq; @@ -107,6 +108,7 @@ static void yurex_delete(struct kref *kref) dev->int_buffer, dev->urb->transfer_dma); usb_free_urb(dev->urb); } + usb_put_intf(dev->interface); usb_put_dev(dev->udev); kfree(dev); } @@ -132,6 +134,7 @@ static void yurex_interrupt(struct urb *urb) switch (status) { case 0: /*success*/ break; + /* The device is terminated or messed up, give up */ case -EOVERFLOW: dev_err(&dev->interface->dev, "%s - overflow with length %d, actual length is %d\n", @@ -140,12 +143,13 @@ static void yurex_interrupt(struct urb *urb) case -ENOENT: case -ESHUTDOWN: case -EILSEQ: - /* The device is terminated, clean up */ + case -EPROTO: + case -ETIME: return; default: dev_err(&dev->interface->dev, "%s - unknown status received: %d\n", __func__, status); - goto exit; + return; } /* handle received message */ @@ -177,7 +181,6 @@ static void yurex_interrupt(struct urb *urb) break; } -exit: retval = usb_submit_urb(dev->urb, GFP_ATOMIC); if (retval) { dev_err(&dev->interface->dev, "%s - usb_submit_urb failed: %d\n", @@ -204,7 +207,7 @@ static int yurex_probe(struct usb_interface *interface, const struct usb_device_ init_waitqueue_head(&dev->waitq); dev->udev = usb_get_dev(interface_to_usbdev(interface)); - dev->interface = interface; + dev->interface = usb_get_intf(interface); /* set up the endpoint information */ iface_desc = interface->cur_altsetting; @@ -315,8 +318,9 @@ static void yurex_disconnect(struct usb_interface *interface) /* prevent more I/O from starting */ usb_poison_urb(dev->urb); + usb_poison_urb(dev->cntl_urb); mutex_lock(&dev->io_mutex); - dev->interface = NULL; + dev->disconnected = 1; mutex_unlock(&dev->io_mutex); /* wakeup waiters */ @@ -404,7 +408,7 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, dev = file->private_data; mutex_lock(&dev->io_mutex); - if (!dev->interface) { /* already disconnected */ + if (dev->disconnected) { /* already disconnected */ mutex_unlock(&dev->io_mutex); return -ENODEV; } @@ -439,7 +443,7 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer, goto error; mutex_lock(&dev->io_mutex); - if (!dev->interface) { /* already disconnected */ + if (dev->disconnected) { /* already disconnected */ mutex_unlock(&dev->io_mutex); retval = -ENODEV; goto error; diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h index d1a0a35ecfff..0824099b905e 100644 --- a/drivers/usb/renesas_usbhs/common.h +++ b/drivers/usb/renesas_usbhs/common.h @@ -211,6 +211,7 @@ struct usbhs_priv; /* DCPCTR */ #define BSTS (1 << 15) /* Buffer Status */ #define SUREQ (1 << 14) /* Sending SETUP Token */ +#define INBUFM (1 << 14) /* (PIPEnCTR) Transfer Buffer Monitor */ #define CSSTS (1 << 12) /* CSSTS Status */ #define ACLRM (1 << 9) /* Buffer Auto-Clear Mode */ #define SQCLR (1 << 8) /* Toggle Bit Clear */ diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 2a01ceb71641..86637cd066cf 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -89,7 +89,7 @@ static void __usbhsf_pkt_del(struct usbhs_pkt *pkt) list_del_init(&pkt->node); } -static struct usbhs_pkt *__usbhsf_pkt_get(struct usbhs_pipe *pipe) +struct usbhs_pkt *__usbhsf_pkt_get(struct usbhs_pipe *pipe) { return list_first_entry_or_null(&pipe->list, struct usbhs_pkt, node); } diff --git a/drivers/usb/renesas_usbhs/fifo.h b/drivers/usb/renesas_usbhs/fifo.h index 88d1816bcda2..c3d3cc35cee0 100644 --- a/drivers/usb/renesas_usbhs/fifo.h +++ b/drivers/usb/renesas_usbhs/fifo.h @@ -97,5 +97,6 @@ void usbhs_pkt_push(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt, void *buf, int len, int zero, int sequence); struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt); void usbhs_pkt_start(struct usbhs_pipe *pipe); +struct usbhs_pkt *__usbhsf_pkt_get(struct usbhs_pipe *pipe); #endif /* RENESAS_USB_FIFO_H */ diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 4d571a5205e2..e5ef56991dba 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -722,8 +722,7 @@ static int __usbhsg_ep_set_halt_wedge(struct usb_ep *ep, int halt, int wedge) struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv); struct device *dev = usbhsg_gpriv_to_dev(gpriv); unsigned long flags; - - usbhsg_pipe_disable(uep); + int ret = 0; dev_dbg(dev, "set halt %d (pipe %d)\n", halt, usbhs_pipe_number(pipe)); @@ -731,6 +730,18 @@ static int __usbhsg_ep_set_halt_wedge(struct usb_ep *ep, int halt, int wedge) /******************** spin lock ********************/ usbhs_lock(priv, flags); + /* + * According to usb_ep_set_halt()'s description, this function should + * return -EAGAIN if the IN endpoint has any queue or data. Note + * that the usbhs_pipe_is_dir_in() returns false if the pipe is an + * IN endpoint in the gadget mode. + */ + if (!usbhs_pipe_is_dir_in(pipe) && (__usbhsf_pkt_get(pipe) || + usbhs_pipe_contains_transmittable_data(pipe))) { + ret = -EAGAIN; + goto out; + } + if (halt) usbhs_pipe_stall(pipe); else @@ -741,10 +752,11 @@ static int __usbhsg_ep_set_halt_wedge(struct usb_ep *ep, int halt, int wedge) else usbhsg_status_clr(gpriv, USBHSG_STATUS_WEDGE); +out: usbhs_unlock(priv, flags); /******************** spin unlock ******************/ - return 0; + return ret; } static int usbhsg_ep_set_halt(struct usb_ep *ep, int value) diff --git a/drivers/usb/renesas_usbhs/pipe.c b/drivers/usb/renesas_usbhs/pipe.c index c4922b96c93b..9e5afdde1adb 100644 --- a/drivers/usb/renesas_usbhs/pipe.c +++ b/drivers/usb/renesas_usbhs/pipe.c @@ -277,6 +277,21 @@ int usbhs_pipe_is_accessible(struct usbhs_pipe *pipe) return -EBUSY; } +bool usbhs_pipe_contains_transmittable_data(struct usbhs_pipe *pipe) +{ + u16 val; + + /* Do not support for DCP pipe */ + if (usbhs_pipe_is_dcp(pipe)) + return false; + + val = usbhsp_pipectrl_get(pipe); + if (val & INBUFM) + return true; + + return false; +} + /* * PID ctrl */ diff --git a/drivers/usb/renesas_usbhs/pipe.h b/drivers/usb/renesas_usbhs/pipe.h index 3080423e600c..3b130529408b 100644 --- a/drivers/usb/renesas_usbhs/pipe.h +++ b/drivers/usb/renesas_usbhs/pipe.h @@ -83,6 +83,7 @@ void usbhs_pipe_clear(struct usbhs_pipe *pipe); void usbhs_pipe_clear_without_sequence(struct usbhs_pipe *pipe, int needs_bfre, int bfre_enable); int usbhs_pipe_is_accessible(struct usbhs_pipe *pipe); +bool usbhs_pipe_contains_transmittable_data(struct usbhs_pipe *pipe); void usbhs_pipe_enable(struct usbhs_pipe *pipe); void usbhs_pipe_disable(struct usbhs_pipe *pipe); void usbhs_pipe_stall(struct usbhs_pipe *pipe); diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index f0688c44b04c..25e81faf4c24 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1030,6 +1030,9 @@ static const struct usb_device_id id_table_combined[] = { /* EZPrototypes devices */ { USB_DEVICE(EZPROTOTYPES_VID, HJELMSLUND_USB485_ISO_PID) }, { USB_DEVICE_INTERFACE_NUMBER(UNJO_VID, UNJO_ISODEBUG_V1_PID, 1) }, + /* Sienna devices */ + { USB_DEVICE(FTDI_VID, FTDI_SIENNA_PID) }, + { USB_DEVICE(ECHELON_VID, ECHELON_U20_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index f12d806220b4..22d66217cb41 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -39,6 +39,9 @@ #define FTDI_LUMEL_PD12_PID 0x6002 +/* Sienna Serial Interface by Secyourit GmbH */ +#define FTDI_SIENNA_PID 0x8348 + /* Cyber Cortex AV by Fabulous Silicon (http://fabuloussilicon.com) */ #define CYBER_CORTEX_AV_PID 0x8698 @@ -689,6 +692,12 @@ #define BANDB_ZZ_PROG1_USB_PID 0xBA02 /* + * Echelon USB Serial Interface + */ +#define ECHELON_VID 0x0920 +#define ECHELON_U20_PID 0x7500 + +/* * Intrepid Control Systems (http://www.intrepidcs.com/) ValueCAN and NeoVI */ #define INTREPID_VID 0x093C diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index d34779fe4a8d..e66a59ef43a1 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -1741,8 +1741,8 @@ static struct urb *keyspan_setup_urb(struct usb_serial *serial, int endpoint, ep_desc = find_ep(serial, endpoint); if (!ep_desc) { - /* leak the urb, something's wrong and the callers don't care */ - return urb; + usb_free_urb(urb); + return NULL; } if (usb_endpoint_xfer_int(ep_desc)) { ep_type_name = "INT"; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 38e920ac7f82..06ab016be0b6 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -419,6 +419,7 @@ static void option_instat_callback(struct urb *urb); #define CINTERION_PRODUCT_PH8_AUDIO 0x0083 #define CINTERION_PRODUCT_AHXX_2RMNET 0x0084 #define CINTERION_PRODUCT_AHXX_AUDIO 0x0085 +#define CINTERION_PRODUCT_CLS8 0x00b0 /* Olivetti products */ #define OLIVETTI_VENDOR_ID 0x0b3c @@ -1154,6 +1155,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff), .driver_info = RSVD(0) | RSVD(1) | NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1050, 0xff), /* Telit FN980 (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1051, 0xff), /* Telit FN980 (MBIM) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1052, 0xff), /* Telit FN980 (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1053, 0xff), /* Telit FN980 (ECM) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), @@ -1847,6 +1856,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_2RMNET, 0xff) }, { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_AUDIO, 0xff) }, + { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_CLS8, 0xff), + .driver_info = RSVD(0) | RSVD(4) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, { USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) }, { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDM) }, diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index a3179fea38c8..8f066bb55d7d 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -314,10 +314,7 @@ static void serial_cleanup(struct tty_struct *tty) serial = port->serial; owner = serial->type->driver.owner; - mutex_lock(&serial->disc_mutex); - if (!serial->disconnected) - usb_autopm_put_interface(serial->interface); - mutex_unlock(&serial->disc_mutex); + usb_autopm_put_interface(serial->interface); usb_serial_put(serial); module_put(owner); diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 96562744101c..5f61d9977a15 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4409,18 +4409,20 @@ static int tcpm_fw_get_caps(struct tcpm_port *port, /* USB data support is optional */ ret = fwnode_property_read_string(fwnode, "data-role", &cap_str); if (ret == 0) { - port->typec_caps.data = typec_find_port_data_role(cap_str); - if (port->typec_caps.data < 0) - return -EINVAL; + ret = typec_find_port_data_role(cap_str); + if (ret < 0) + return ret; + port->typec_caps.data = ret; } ret = fwnode_property_read_string(fwnode, "power-role", &cap_str); if (ret < 0) return ret; - port->typec_caps.type = typec_find_port_power_role(cap_str); - if (port->typec_caps.type < 0) - return -EINVAL; + ret = typec_find_port_power_role(cap_str); + if (ret < 0) + return ret; + port->typec_caps.type = ret; port->port_type = port->typec_caps.type; if (port->port_type == TYPEC_PORT_SNK) diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 6c103697c582..d99700cb4dca 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -75,6 +75,8 @@ static int ucsi_displayport_enter(struct typec_altmode *alt) if (cur != 0xff) { mutex_unlock(&dp->con->lock); + if (dp->con->port_altmode[cur] == alt) + return 0; return -EBUSY; } diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index 907e20e1a71e..d772fce51905 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -195,7 +195,6 @@ struct ucsi_ccg { /* fw build with vendor information */ u16 fw_build; - bool run_isr; /* flag to call ISR routine during resume */ struct work_struct pm_work; }; @@ -224,18 +223,6 @@ static int ccg_read(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len) if (quirks && quirks->max_read_len) max_read_len = quirks->max_read_len; - if (uc->fw_build == CCG_FW_BUILD_NVIDIA && - uc->fw_version <= CCG_OLD_FW_VERSION) { - mutex_lock(&uc->lock); - /* - * Do not schedule pm_work to run ISR in - * ucsi_ccg_runtime_resume() after pm_runtime_get_sync() - * since we are already in ISR path. - */ - uc->run_isr = false; - mutex_unlock(&uc->lock); - } - pm_runtime_get_sync(uc->dev); while (rem_len > 0) { msgs[1].buf = &data[len - rem_len]; @@ -278,18 +265,6 @@ static int ccg_write(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len) msgs[0].len = len + sizeof(rab); msgs[0].buf = buf; - if (uc->fw_build == CCG_FW_BUILD_NVIDIA && - uc->fw_version <= CCG_OLD_FW_VERSION) { - mutex_lock(&uc->lock); - /* - * Do not schedule pm_work to run ISR in - * ucsi_ccg_runtime_resume() after pm_runtime_get_sync() - * since we are already in ISR path. - */ - uc->run_isr = false; - mutex_unlock(&uc->lock); - } - pm_runtime_get_sync(uc->dev); status = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); if (status < 0) { @@ -1130,7 +1105,6 @@ static int ucsi_ccg_probe(struct i2c_client *client, uc->ppm.sync = ucsi_ccg_sync; uc->dev = dev; uc->client = client; - uc->run_isr = true; mutex_init(&uc->lock); INIT_WORK(&uc->work, ccg_update_firmware); INIT_WORK(&uc->pm_work, ccg_pm_workaround_work); @@ -1188,6 +1162,8 @@ static int ucsi_ccg_probe(struct i2c_client *client, pm_runtime_set_active(uc->dev); pm_runtime_enable(uc->dev); + pm_runtime_use_autosuspend(uc->dev); + pm_runtime_set_autosuspend_delay(uc->dev, 5000); pm_runtime_idle(uc->dev); return 0; @@ -1229,7 +1205,6 @@ static int ucsi_ccg_runtime_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); struct ucsi_ccg *uc = i2c_get_clientdata(client); - bool schedule = true; /* * Firmware version 3.1.10 or earlier, built for NVIDIA has known issue @@ -1237,17 +1212,8 @@ static int ucsi_ccg_runtime_resume(struct device *dev) * Schedule a work to call ISR as a workaround. */ if (uc->fw_build == CCG_FW_BUILD_NVIDIA && - uc->fw_version <= CCG_OLD_FW_VERSION) { - mutex_lock(&uc->lock); - if (!uc->run_isr) { - uc->run_isr = true; - schedule = false; - } - mutex_unlock(&uc->lock); - - if (schedule) - schedule_work(&uc->pm_work); - } + uc->fw_version <= CCG_OLD_FW_VERSION) + schedule_work(&uc->pm_work); return 0; } diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c index c31d17d05810..2dc58766273a 100644 --- a/drivers/usb/usb-skeleton.c +++ b/drivers/usb/usb-skeleton.c @@ -61,6 +61,7 @@ struct usb_skel { spinlock_t err_lock; /* lock for errors */ struct kref kref; struct mutex io_mutex; /* synchronize I/O with disconnect */ + unsigned long disconnected:1; wait_queue_head_t bulk_in_wait; /* to wait for an ongoing read */ }; #define to_skel_dev(d) container_of(d, struct usb_skel, kref) @@ -73,6 +74,7 @@ static void skel_delete(struct kref *kref) struct usb_skel *dev = to_skel_dev(kref); usb_free_urb(dev->bulk_in_urb); + usb_put_intf(dev->interface); usb_put_dev(dev->udev); kfree(dev->bulk_in_buffer); kfree(dev); @@ -124,10 +126,7 @@ static int skel_release(struct inode *inode, struct file *file) return -ENODEV; /* allow the device to be autosuspended */ - mutex_lock(&dev->io_mutex); - if (dev->interface) - usb_autopm_put_interface(dev->interface); - mutex_unlock(&dev->io_mutex); + usb_autopm_put_interface(dev->interface); /* decrement the count on our device */ kref_put(&dev->kref, skel_delete); @@ -231,8 +230,7 @@ static ssize_t skel_read(struct file *file, char *buffer, size_t count, dev = file->private_data; - /* if we cannot read at all, return EOF */ - if (!dev->bulk_in_urb || !count) + if (!count) return 0; /* no concurrent readers */ @@ -240,7 +238,7 @@ static ssize_t skel_read(struct file *file, char *buffer, size_t count, if (rv < 0) return rv; - if (!dev->interface) { /* disconnect() was called */ + if (dev->disconnected) { /* disconnect() was called */ rv = -ENODEV; goto exit; } @@ -422,7 +420,7 @@ static ssize_t skel_write(struct file *file, const char *user_buffer, /* this lock makes sure we don't submit URBs to gone devices */ mutex_lock(&dev->io_mutex); - if (!dev->interface) { /* disconnect() was called */ + if (dev->disconnected) { /* disconnect() was called */ mutex_unlock(&dev->io_mutex); retval = -ENODEV; goto error; @@ -507,7 +505,7 @@ static int skel_probe(struct usb_interface *interface, init_waitqueue_head(&dev->bulk_in_wait); dev->udev = usb_get_dev(interface_to_usbdev(interface)); - dev->interface = interface; + dev->interface = usb_get_intf(interface); /* set up the endpoint information */ /* use only the first bulk-in and bulk-out endpoints */ @@ -573,9 +571,10 @@ static void skel_disconnect(struct usb_interface *interface) /* prevent more I/O from starting */ mutex_lock(&dev->io_mutex); - dev->interface = NULL; + dev->disconnected = 1; mutex_unlock(&dev->io_mutex); + usb_kill_urb(dev->bulk_in_urb); usb_kill_anchored_urbs(&dev->submitted); /* decrement our usage count */ diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 585a84d319bd..65850e9c7190 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -1195,12 +1195,12 @@ static int vhci_start(struct usb_hcd *hcd) if (id == 0 && usb_hcd_is_primary_hcd(hcd)) { err = vhci_init_attr_group(); if (err) { - pr_err("init attr group\n"); + dev_err(hcd_dev(hcd), "init attr group failed, err = %d\n", err); return err; } err = sysfs_create_group(&hcd_dev(hcd)->kobj, &vhci_attr_group); if (err) { - pr_err("create sysfs files\n"); + dev_err(hcd_dev(hcd), "create sysfs files failed, err = %d\n", err); vhci_finish_attr_group(); return err; } diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 7804869c6a31..056308008288 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -161,6 +161,7 @@ static int vhost_test_release(struct inode *inode, struct file *f) vhost_test_stop(n, &private); vhost_test_flush(n); + vhost_dev_stop(&n->dev); vhost_dev_cleanup(&n->dev); /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ @@ -237,6 +238,7 @@ static long vhost_test_reset_owner(struct vhost_test *n) } vhost_test_stop(n, &priv); vhost_test_flush(n); + vhost_dev_stop(&n->dev); vhost_dev_reset_owner(&n->dev, umem); done: mutex_unlock(&n->dev.mutex); diff --git a/drivers/video/logo/Makefile b/drivers/video/logo/Makefile index 228a89b9bdd1..16f60c1e1766 100644 --- a/drivers/video/logo/Makefile +++ b/drivers/video/logo/Makefile @@ -18,23 +18,6 @@ obj-$(CONFIG_SPU_BASE) += logo_spe_clut224.o # How to generate logo's -# Use logo-cfiles to retrieve list of .c files to be built -logo-cfiles = $(notdir $(patsubst %.$(2), %.c, \ - $(wildcard $(srctree)/$(src)/*$(1).$(2)))) - - -# Mono logos -extra-y += $(call logo-cfiles,_mono,pbm) - -# VGA16 logos -extra-y += $(call logo-cfiles,_vga16,ppm) - -# 224 Logos -extra-y += $(call logo-cfiles,_clut224,ppm) - -# Gray 256 -extra-y += $(call logo-cfiles,_gray256,pgm) - pnmtologo := scripts/pnmtologo # Create commands like "pnmtologo -t mono -n logo_mac_mono -o ..." @@ -55,5 +38,5 @@ $(obj)/%_clut224.c: $(src)/%_clut224.ppm $(pnmtologo) FORCE $(obj)/%_gray256.c: $(src)/%_gray256.pgm $(pnmtologo) FORCE $(call if_changed,logo) -# Files generated that shall be removed upon make clean -clean-files := *.o *_mono.c *_vga16.c *_clut224.c *_gray256.c +# generated C files +targets += *_mono.c *_vga16.c *_clut224.c *_gray256.c diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c index 75fd140b02ff..43c391626a00 100644 --- a/drivers/virt/vboxguest/vboxguest_utils.c +++ b/drivers/virt/vboxguest/vboxguest_utils.c @@ -220,6 +220,8 @@ static int hgcm_call_preprocess_linaddr( if (!bounce_buf) return -ENOMEM; + *bounce_buf_ret = bounce_buf; + if (copy_in) { ret = copy_from_user(bounce_buf, (void __user *)buf, len); if (ret) @@ -228,7 +230,6 @@ static int hgcm_call_preprocess_linaddr( memset(bounce_buf, 0, len); } - *bounce_buf_ret = bounce_buf; hgcm_call_add_pagelist_size(bounce_buf, len, extra); return 0; } diff --git a/drivers/w1/slaves/Kconfig b/drivers/w1/slaves/Kconfig index ebed495b9e69..b7847636501d 100644 --- a/drivers/w1/slaves/Kconfig +++ b/drivers/w1/slaves/Kconfig @@ -103,6 +103,7 @@ config W1_SLAVE_DS2438 config W1_SLAVE_DS250X tristate "512b/1kb/16kb EPROM family support" + select CRC16 help Say Y here if you want to use a 1-wire 512b/1kb/16kb EPROM family device (DS250x). diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 4e11de6cde81..5bae515c8e25 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -156,8 +156,10 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC) /* balloon_append: add the given page to the balloon. */ -static void __balloon_append(struct page *page) +static void balloon_append(struct page *page) { + __SetPageOffline(page); + /* Lowmem is re-populated first, so highmem pages go at list tail. */ if (PageHighMem(page)) { list_add_tail(&page->lru, &ballooned_pages); @@ -169,11 +171,6 @@ static void __balloon_append(struct page *page) wake_up(&balloon_wq); } -static void balloon_append(struct page *page) -{ - __balloon_append(page); -} - /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ static struct page *balloon_retrieve(bool require_lowmem) { @@ -192,6 +189,7 @@ static struct page *balloon_retrieve(bool require_lowmem) else balloon_stats.balloon_low--; + __ClearPageOffline(page); return page; } @@ -377,8 +375,7 @@ static void xen_online_page(struct page *page, unsigned int order) for (i = 0; i < size; i++) { p = pfn_to_page(start_pfn + i); __online_page_set_limits(p); - __SetPageOffline(p); - __balloon_append(p); + balloon_append(p); } mutex_unlock(&balloon_mutex); } @@ -444,7 +441,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages) xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]); /* Relinquish the page back to the allocator. */ - __ClearPageOffline(page); free_reserved_page(page); } @@ -471,7 +467,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) state = BP_EAGAIN; break; } - __SetPageOffline(page); adjust_managed_page_count(page, -1); xenmem_reservation_scrub_page(page); list_add(&page->lru, &pages); @@ -611,7 +606,6 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages) while (pgno < nr_pages) { page = balloon_retrieve(true); if (page) { - __ClearPageOffline(page); pages[pgno++] = page; #ifdef CONFIG_XEN_HAVE_PVMMU /* @@ -653,10 +647,8 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) mutex_lock(&balloon_mutex); for (i = 0; i < nr_pages; i++) { - if (pages[i]) { - __SetPageOffline(pages[i]); + if (pages[i]) balloon_append(pages[i]); - } } balloon_stats.target_unpopulated -= nr_pages; @@ -674,7 +666,6 @@ static void __init balloon_add_region(unsigned long start_pfn, unsigned long pages) { unsigned long pfn, extra_pfn_end; - struct page *page; /* * If the amount of usable memory has been limited (e.g., with @@ -684,11 +675,10 @@ static void __init balloon_add_region(unsigned long start_pfn, extra_pfn_end = min(max_pfn, start_pfn + pages); for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) { - page = pfn_to_page(pfn); /* totalram_pages and totalhigh_pages do not include the boot-time balloon extension, so don't subtract from it. */ - __balloon_append(page); + balloon_append(pfn_to_page(pfn)); } balloon_stats.total_pages += extra_pfn_end - start_pfn; diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c index 89d60f8e3c18..d1ff2186ebb4 100644 --- a/drivers/xen/efi.c +++ b/drivers/xen/efi.c @@ -40,7 +40,7 @@ #define efi_data(op) (op.u.efi_runtime_call) -efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) +static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { struct xen_platform_op op = INIT_EFI_OP(get_time); @@ -61,9 +61,8 @@ efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_time); -efi_status_t xen_efi_set_time(efi_time_t *tm) +static efi_status_t xen_efi_set_time(efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(set_time); @@ -75,10 +74,10 @@ efi_status_t xen_efi_set_time(efi_time_t *tm) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_set_time); -efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, - efi_time_t *tm) +static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, + efi_bool_t *pending, + efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(get_wakeup_time); @@ -98,9 +97,8 @@ efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_wakeup_time); -efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(set_wakeup_time); @@ -117,11 +115,10 @@ efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_set_wakeup_time); -efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, - u32 *attr, unsigned long *data_size, - void *data) +static efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, + void *data) { struct xen_platform_op op = INIT_EFI_OP(get_variable); @@ -141,11 +138,10 @@ efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_variable); -efi_status_t xen_efi_get_next_variable(unsigned long *name_size, - efi_char16_t *name, - efi_guid_t *vendor) +static efi_status_t xen_efi_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) { struct xen_platform_op op = INIT_EFI_OP(get_next_variable_name); @@ -165,11 +161,10 @@ efi_status_t xen_efi_get_next_variable(unsigned long *name_size, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_next_variable); -efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, - u32 attr, unsigned long data_size, - void *data) +static efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data) { struct xen_platform_op op = INIT_EFI_OP(set_variable); @@ -186,11 +181,10 @@ efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_set_variable); -efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, - u64 *remaining_space, - u64 *max_variable_size) +static efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) { struct xen_platform_op op = INIT_EFI_OP(query_variable_info); @@ -208,9 +202,8 @@ efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_query_variable_info); -efi_status_t xen_efi_get_next_high_mono_count(u32 *count) +static efi_status_t xen_efi_get_next_high_mono_count(u32 *count) { struct xen_platform_op op = INIT_EFI_OP(get_next_high_monotonic_count); @@ -221,10 +214,9 @@ efi_status_t xen_efi_get_next_high_mono_count(u32 *count) return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_get_next_high_mono_count); -efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, - unsigned long count, unsigned long sg_list) +static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list) { struct xen_platform_op op = INIT_EFI_OP(update_capsule); @@ -241,11 +233,9 @@ efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_update_capsule); -efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, - unsigned long count, u64 *max_size, - int *reset_type) +static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, int *reset_type) { struct xen_platform_op op = INIT_EFI_OP(query_capsule_capabilities); @@ -264,10 +254,9 @@ efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, return efi_data(op).status; } -EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps); -void xen_efi_reset_system(int reset_type, efi_status_t status, - unsigned long data_size, efi_char16_t *data) +static void xen_efi_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) { switch (reset_type) { case EFI_RESET_COLD: @@ -281,4 +270,25 @@ void xen_efi_reset_system(int reset_type, efi_status_t status, BUG(); } } -EXPORT_SYMBOL_GPL(xen_efi_reset_system); + +/* + * Set XEN EFI runtime services function pointers. Other fields of struct efi, + * e.g. efi.systab, will be set like normal EFI. + */ +void __init xen_efi_runtime_setup(void) +{ + efi.get_time = xen_efi_get_time; + efi.set_time = xen_efi_set_time; + efi.get_wakeup_time = xen_efi_get_wakeup_time; + efi.set_wakeup_time = xen_efi_set_wakeup_time; + efi.get_variable = xen_efi_get_variable; + efi.get_next_variable = xen_efi_get_next_variable; + efi.set_variable = xen_efi_set_variable; + efi.set_variable_nonblocking = xen_efi_set_variable; + efi.query_variable_info = xen_efi_query_variable_info; + efi.query_variable_info_nonblocking = xen_efi_query_variable_info; + efi.update_capsule = xen_efi_update_capsule; + efi.query_capsule_caps = xen_efi_query_capsule_caps; + efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; + efi.reset_system = xen_efi_reset_system; +} diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index a446a7221e13..81401f386c9c 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt +#include <linux/dma-mapping.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -34,9 +35,6 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/refcount.h> -#ifdef CONFIG_XEN_GRANT_DMA_ALLOC -#include <linux/of_device.h> -#endif #include <xen/xen.h> #include <xen/grant_table.h> @@ -625,14 +623,7 @@ static int gntdev_open(struct inode *inode, struct file *flip) flip->private_data = priv; #ifdef CONFIG_XEN_GRANT_DMA_ALLOC priv->dma_dev = gntdev_miscdev.this_device; - - /* - * The device is not spawn from a device tree, so arch_setup_dma_ops - * is not called, thus leaving the device with dummy DMA ops. - * Fix this by calling of_dma_configure() with a NULL node to set - * default DMA ops. - */ - of_dma_configure(priv->dma_dev, NULL, true); + dma_coerce_mask_and_coherent(priv->dma_dev, DMA_BIT_MASK(64)); #endif pr_debug("priv %p\n", priv); diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 7ea6fb6a2e5d..49b381e104ef 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -1363,8 +1363,7 @@ static int gnttab_setup(void) if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) { gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr; if (gnttab_shared.addr == NULL) { - pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n", - (unsigned long)xen_auto_xlat_grant_frames.vaddr); + pr_warn("gnttab share frames is not mapped!\n"); return -ENOMEM; } } diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 69a626b0e594..c57c71b7d53d 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -775,7 +775,7 @@ static int pvcalls_back_poll(struct xenbus_device *dev, mappass->reqcopy = *req; icsk = inet_csk(mappass->sock->sk); queue = &icsk->icsk_accept_queue; - data = queue->rskq_accept_head != NULL; + data = READ_ONCE(queue->rskq_accept_head) != NULL; if (data) { mappass->reqcopy.cmd = 0; ret = 0; diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 08adc590f631..597af455a522 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -55,6 +55,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/miscdevice.h> +#include <linux/workqueue.h> #include <xen/xenbus.h> #include <xen/xen.h> @@ -116,6 +117,8 @@ struct xenbus_file_priv { wait_queue_head_t read_waitq; struct kref kref; + + struct work_struct wq; }; /* Read out any raw xenbus messages queued up. */ @@ -300,14 +303,14 @@ static void watch_fired(struct xenbus_watch *watch, mutex_unlock(&adap->dev_data->reply_mutex); } -static void xenbus_file_free(struct kref *kref) +static void xenbus_worker(struct work_struct *wq) { struct xenbus_file_priv *u; struct xenbus_transaction_holder *trans, *tmp; struct watch_adapter *watch, *tmp_watch; struct read_buffer *rb, *tmp_rb; - u = container_of(kref, struct xenbus_file_priv, kref); + u = container_of(wq, struct xenbus_file_priv, wq); /* * No need for locking here because there are no other users, @@ -333,6 +336,18 @@ static void xenbus_file_free(struct kref *kref) kfree(u); } +static void xenbus_file_free(struct kref *kref) +{ + struct xenbus_file_priv *u; + + /* + * We might be called in xenbus_thread(). + * Use workqueue to avoid deadlock. + */ + u = container_of(kref, struct xenbus_file_priv, kref); + schedule_work(&u->wq); +} + static struct xenbus_transaction_holder *xenbus_get_transaction( struct xenbus_file_priv *u, uint32_t tx_id) { @@ -650,6 +665,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp) INIT_LIST_HEAD(&u->watches); INIT_LIST_HEAD(&u->read_buffers); init_waitqueue_head(&u->read_waitq); + INIT_WORK(&u->wq, xenbus_worker); mutex_init(&u->reply_mutex); mutex_init(&u->msgbuffer_mutex); |