diff options
134 files changed, 2497 insertions, 948 deletions
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst index 29ca5f5feb35..57047dcb8d19 100644 --- a/Documentation/gpu/amdgpu.rst +++ b/Documentation/gpu/amdgpu.rst @@ -70,6 +70,15 @@ Interrupt Handling .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c :internal: +IP Blocks +------------------ + +.. kernel-doc:: drivers/gpu/drm/amd/include/amd_shared.h + :doc: IP Blocks + +.. kernel-doc:: drivers/gpu/drm/amd/include/amd_shared.h + :identifiers: amd_ip_block_type amd_ip_funcs + AMDGPU XGMI Support =================== diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6125ba905faf..87f095dc385c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -104,6 +104,7 @@ #include "amdgpu_mes.h" #include "amdgpu_umc.h" #include "amdgpu_mmhub.h" +#include "amdgpu_gfxhub.h" #include "amdgpu_df.h" #define MAX_GPU_INSTANCE 16 @@ -881,6 +882,9 @@ struct amdgpu_device { /* mmhub */ struct amdgpu_mmhub mmhub; + /* gfxhub */ + struct amdgpu_gfxhub gfxhub; + /* gfx */ struct amdgpu_gfx gfx; @@ -1016,18 +1020,32 @@ int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, uint32_t *buf, size_t size, bool write); -uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, +uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags); +void amdgpu_device_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, uint32_t acc_flags); -void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - uint32_t acc_flags); -void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - uint32_t acc_flags); +void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, + uint32_t reg, uint32_t v); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset); u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); +u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, + u32 pcie_index, u32 pcie_data, + u32 reg_addr); +u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, + u32 pcie_index, u32 pcie_data, + u32 reg_addr); +void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, + u32 pcie_index, u32 pcie_data, + u32 reg_addr, u32 reg_data); +void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, + u32 pcie_index, u32 pcie_data, + u32 reg_addr, u64 reg_data); + bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type); bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); @@ -1038,8 +1056,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); */ #define AMDGPU_REGS_NO_KIQ (1<<1) -#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) -#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) +#define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) +#define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) #define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) #define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) @@ -1047,9 +1065,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) -#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) -#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) -#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0) +#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0) +#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_device_rreg(adev, (reg), 0)) +#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) @@ -1095,7 +1113,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); WREG32_SMC(_Reg, tmp); \ } while (0) -#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_mm_rreg((adev), (reg), false)) +#define DREG32_SYS(sqf, adev, reg) seq_printf((sqf), #reg " : 0x%08X\n", amdgpu_device_rreg((adev), (reg), false)) #define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg)) #define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 4a93b880c6bf..165b02e267b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -806,8 +806,8 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) } adev->atif = atif; - if (atif->notifications.brightness_change) { #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) + if (atif->notifications.brightness_change) { if (amdgpu_device_has_dc_support(adev)) { #if defined(CONFIG_DRM_AMD_DC) struct amdgpu_display_manager *dm = &adev->dm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index edff1b7f282a..0544460653b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -36,6 +36,8 @@ */ uint64_t amdgpu_amdkfd_total_mem_size; +static bool kfd_initialized; + int amdgpu_amdkfd_init(void) { struct sysinfo si; @@ -51,19 +53,26 @@ int amdgpu_amdkfd_init(void) #else ret = -ENOENT; #endif + kfd_initialized = !ret; return ret; } void amdgpu_amdkfd_fini(void) { - kgd2kfd_exit(); + if (kfd_initialized) { + kgd2kfd_exit(); + kfd_initialized = false; + } } void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { bool vf = amdgpu_sriov_vf(adev); + if (!kfd_initialized) + return; + adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, adev->pdev, adev->asic_type, vf); @@ -572,6 +581,13 @@ uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd) return adev->rev_id; } +int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->gmc.noretry; +} + int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index a10507ecb750..bc9f0e42e0a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -181,6 +181,7 @@ uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd); +int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); /* Read user wptr from a specified user address space with page fault diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index df0aab0fc67e..1529815838f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -32,7 +32,6 @@ #include "v10_structs.h" #include "nv.h" #include "nvd.h" -#include "gfxhub_v2_0.h" enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -753,7 +752,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, } /* SDMA is on gfxhub as well for Navi1* series */ - gfxhub_v2_0_setup_vm_pt_regs(adev, vmid, page_table_base); + adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } const struct kfd2kgd_calls gfx_v10_kfd2kgd = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 5b38f848b772..50016bf9c427 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -31,7 +31,6 @@ #include "v10_structs.h" #include "nv.h" #include "nvd.h" -#include "gfxhub_v2_1.h" enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -657,7 +656,7 @@ static void set_vm_context_page_table_base_v10_3(struct kgd_dev *kgd, uint32_t v struct amdgpu_device *adev = get_amdgpu_device(kgd); /* SDMA is on gfxhub as well for Navi1* series */ - gfxhub_v2_1_setup_vm_pt_regs(adev, vmid, page_table_base); + adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } #if 0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index e6aede725197..e0d5110701bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -36,9 +36,7 @@ #include "v9_structs.h" #include "soc15.h" #include "soc15d.h" -#include "mmhub_v1_0.h" -#include "gfxhub_v1_0.h" - +#include "gfx_v9_0.h" enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -703,7 +701,180 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, adev->mmhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); - gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); + adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); +} + +static void lock_spi_csq_mutexes(struct amdgpu_device *adev) +{ + mutex_lock(&adev->srbm_mutex); + mutex_lock(&adev->grbm_idx_mutex); + +} + +static void unlock_spi_csq_mutexes(struct amdgpu_device *adev) +{ + mutex_unlock(&adev->grbm_idx_mutex); + mutex_unlock(&adev->srbm_mutex); +} + +/** + * @get_wave_count: Read device registers to get number of waves in flight for + * a particular queue. The method also returns the VMID associated with the + * queue. + * + * @adev: Handle of device whose registers are to be read + * @queue_idx: Index of queue in the queue-map bit-field + * @wave_cnt: Output parameter updated with number of waves in flight + * @vmid: Output parameter updated with VMID of queue whose wave count + * is being collected + */ +static void get_wave_count(struct amdgpu_device *adev, int queue_idx, + int *wave_cnt, int *vmid) +{ + int pipe_idx; + int queue_slot; + unsigned int reg_val; + + /* + * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID + * parameters to read out waves in flight. Get VMID if there are + * non-zero waves in flight. + */ + *vmid = 0xFF; + *wave_cnt = 0; + pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; + queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; + soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0); + reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + + queue_slot); + *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; + if (*wave_cnt != 0) + *vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) & + CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT; +} + +/** + * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each + * shader engine and aggregates the number of waves that are in flight for the + * process whose pasid is provided as a parameter. The process could have ZERO + * or more queues running and submitting waves to compute units. + * + * @kgd: Handle of device from which to get number of waves in flight + * @pasid: Identifies the process for which this query call is invoked + * @wave_cnt: Output parameter updated with number of waves in flight that + * belong to process with given pasid + * @max_waves_per_cu: Output parameter updated with maximum number of waves + * possible per Compute Unit + * + * @note: It's possible that the device has too many queues (oversubscription) + * in which case a VMID could be remapped to a different PASID. This could lead + * to an iaccurate wave count. Following is a high-level sequence: + * Time T1: vmid = getVmid(); vmid is associated with Pasid P1 + * Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2 + * In the sequence above wave count obtained from time T1 will be incorrectly + * lost or added to total wave count. + * + * The registers that provide the waves in flight are: + * + * SPI_CSQ_WF_ACTIVE_STATUS - bit-map of queues per pipe. The bit is ON if a + * queue is slotted, OFF if there is no queue. A process could have ZERO or + * more queues slotted and submitting waves to be run on compute units. Even + * when there is a queue it is possible there could be zero wave fronts, this + * can happen when queue is waiting on top-of-pipe events - e.g. waitRegMem + * command + * + * For each bit that is ON from above: + * + * Read (SPI_CSQ_WF_ACTIVE_COUNT_0 + queue_idx) register. It provides the + * number of waves that are in flight for the queue at specified index. The + * index ranges from 0 to 7. + * + * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID + * of the wave(s). + * + * Determine if VMID from above step maps to pasid provided as parameter. If + * it matches agrregate the wave count. That the VMID will not match pasid is + * a normal condition i.e. a device is expected to support multiple queues + * from multiple proceses. + * + * Reading registers referenced above involves programming GRBM appropriately + */ +static void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, + int *pasid_wave_cnt, int *max_waves_per_cu) +{ + int qidx; + int vmid; + int se_idx; + int sh_idx; + int se_cnt; + int sh_cnt; + int wave_cnt; + int queue_map; + int pasid_tmp; + int max_queue_cnt; + int vmid_wave_cnt = 0; + struct amdgpu_device *adev; + DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); + + adev = get_amdgpu_device(kgd); + lock_spi_csq_mutexes(adev); + soc15_grbm_select(adev, 1, 0, 0, 0); + + /* + * Iterate through the shader engines and arrays of the device + * to get number of waves in flight + */ + bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap, + KGD_MAX_QUEUES); + max_queue_cnt = adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + sh_cnt = adev->gfx.config.max_sh_per_se; + se_cnt = adev->gfx.config.max_shader_engines; + for (se_idx = 0; se_idx < se_cnt; se_idx++) { + for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { + + gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); + queue_map = RREG32(SOC15_REG_OFFSET(GC, 0, + mmSPI_CSQ_WF_ACTIVE_STATUS)); + + /* + * Assumption: queue map encodes following schema: four + * pipes per each micro-engine, with each pipe mapping + * eight queues. This schema is true for GFX9 devices + * and must be verified for newer device families + */ + for (qidx = 0; qidx < max_queue_cnt; qidx++) { + + /* Skip qeueus that are not associated with + * compute functions + */ + if (!test_bit(qidx, cp_queue_bitmap)) + continue; + + if (!(queue_map & (1 << qidx))) + continue; + + /* Get number of waves in flight and aggregate them */ + get_wave_count(adev, qidx, &wave_cnt, &vmid); + if (wave_cnt != 0) { + pasid_tmp = + RREG32(SOC15_REG_OFFSET(OSSSYS, 0, + mmIH_VMID_0_LUT) + vmid); + if (pasid_tmp == pasid) + vmid_wave_cnt += wave_cnt; + } + } + } + } + + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + soc15_grbm_select(adev, 0, 0, 0, 0); + unlock_spi_csq_mutexes(adev); + + /* Update the output parameters and return */ + *pasid_wave_cnt = vmid_wave_cnt; + *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu * + adev->gfx.cu_info.max_waves_per_simd; } const struct kfd2kgd_calls gfx_v9_kfd2kgd = { @@ -726,4 +897,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 17c010d0431f..b4df6460e45a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -543,6 +543,7 @@ int amdgpu_mem_train_support(struct amdgpu_device *adev) case HW_REV(11, 0, 0): case HW_REV(11, 0, 5): case HW_REV(11, 0, 7): + case HW_REV(11, 0, 11): ret = 1; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index abe0c2729e1c..2d125b8b15ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -267,7 +267,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, } else { r = get_user(value, (uint32_t *)buf); if (!r) - amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 0); + amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value); } if (r) { result = r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2ff43a3d52fc..e8b41756c9f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -80,8 +80,6 @@ MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/navy_flounder_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -303,10 +301,10 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, } /* - * MMIO register access helper functions. + * register access helper functions. */ /** - * amdgpu_mm_rreg - read a memory mapped IO register + * amdgpu_device_rreg - read a memory mapped IO or indirect register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset @@ -314,33 +312,29 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, * * Returns the 32 bit value from the offset specified. */ -uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, - uint32_t acc_flags) +uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags) { uint32_t ret; if (adev->in_pci_err_recovery) return 0; - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && - down_read_trylock(&adev->reset_sem)) { - ret = amdgpu_kiq_rreg(adev, reg); - up_read(&adev->reset_sem); - return ret; + if ((reg * 4) < adev->rmmio_size) { + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_sem)) { + ret = amdgpu_kiq_rreg(adev, reg); + up_read(&adev->reset_sem); + } else { + ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + ret = adev->pcie_rreg(adev, reg * 4); } - if ((reg * 4) < adev->rmmio_size) - ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); - else { - unsigned long flags; + trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); - ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); - } - - trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret); return ret; } @@ -394,29 +388,8 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) BUG(); } -static inline void amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, - uint32_t reg, uint32_t v, - uint32_t acc_flags) -{ - if (adev->in_pci_err_recovery) - return; - - trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); - - if ((reg * 4) < adev->rmmio_size) - writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); - else { - unsigned long flags; - - spin_lock_irqsave(&adev->mmio_idx_lock, flags); - writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4)); - writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); - } -} - /** - * amdgpu_mm_wreg - write to a memory mapped IO register + * amdgpu_device_wreg - write to a memory mapped IO or indirect register * * @adev: amdgpu_device pointer * @reg: dword aligned register offset @@ -425,20 +398,27 @@ static inline void amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, * * Writes the value specified to the offset specified. */ -void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - uint32_t acc_flags) +void amdgpu_device_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, + uint32_t acc_flags) { if (adev->in_pci_err_recovery) return; - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && - down_read_trylock(&adev->reset_sem)) { - amdgpu_kiq_wreg(adev, reg, v); - up_read(&adev->reset_sem); - return; + if ((reg * 4) < adev->rmmio_size) { + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_sem)) { + amdgpu_kiq_wreg(adev, reg, v); + up_read(&adev->reset_sem); + } else { + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + adev->pcie_wreg(adev, reg * 4, v); } - amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); + trace_amdgpu_device_wreg(adev->pdev->device, reg, v); } /* @@ -446,21 +426,20 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, * * this function is invoked only the debugfs register access * */ -void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - uint32_t acc_flags) +void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, + uint32_t reg, uint32_t v) { if (adev->in_pci_err_recovery) return; if (amdgpu_sriov_fullaccess(adev) && - adev->gfx.rlc.funcs && - adev->gfx.rlc.funcs->is_rlcg_access_range) { - + adev->gfx.rlc.funcs && + adev->gfx.rlc.funcs->is_rlcg_access_range) { if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v); + } else { + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); } - - amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags); } /** @@ -595,6 +574,135 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) } /** + * amdgpu_device_indirect_rreg - read an indirect register + * + * @adev: amdgpu_device pointer + * @pcie_index: mmio register offset + * @pcie_data: mmio register offset + * + * Returns the value of indirect register @reg_addr + */ +u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, + u32 pcie_index, u32 pcie_data, + u32 reg_addr) +{ + unsigned long flags; + u32 r; + void __iomem *pcie_index_offset; + void __iomem *pcie_data_offset; + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + r = readl(pcie_data_offset); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + + return r; +} + +/** + * amdgpu_device_indirect_rreg64 - read a 64bits indirect register + * + * @adev: amdgpu_device pointer + * @pcie_index: mmio register offset + * @pcie_data: mmio register offset + * + * Returns the value of indirect register @reg_addr + */ +u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, + u32 pcie_index, u32 pcie_data, + u32 reg_addr) +{ + unsigned long flags; + u64 r; + void __iomem *pcie_index_offset; + void __iomem *pcie_data_offset; + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + + /* read low 32 bits */ + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + r = readl(pcie_data_offset); + /* read high 32 bits */ + writel(reg_addr + 4, pcie_index_offset); + readl(pcie_index_offset); + r |= ((u64)readl(pcie_data_offset) << 32); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + + return r; +} + +/** + * amdgpu_device_indirect_wreg - write an indirect register address + * + * @adev: amdgpu_device pointer + * @pcie_index: mmio register offset + * @pcie_data: mmio register offset + * @reg_addr: indirect register offset + * @reg_data: indirect register data + * + */ +void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, + u32 pcie_index, u32 pcie_data, + u32 reg_addr, u32 reg_data) +{ + unsigned long flags; + void __iomem *pcie_index_offset; + void __iomem *pcie_data_offset; + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + writel(reg_data, pcie_data_offset); + readl(pcie_data_offset); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/** + * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address + * + * @adev: amdgpu_device pointer + * @pcie_index: mmio register offset + * @pcie_data: mmio register offset + * @reg_addr: indirect register offset + * @reg_data: indirect register data + * + */ +void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, + u32 pcie_index, u32 pcie_data, + u32 reg_addr, u64 reg_data) +{ + unsigned long flags; + void __iomem *pcie_index_offset; + void __iomem *pcie_data_offset; + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + + /* write low 32 bits */ + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset); + readl(pcie_data_offset); + /* write high 32 bits */ + writel(reg_addr + 4, pcie_index_offset); + readl(pcie_index_offset); + writel((u32)(reg_data >> 32), pcie_data_offset); + readl(pcie_data_offset); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/** * amdgpu_invalid_rreg - dummy reg read function * * @adev: amdgpu device pointer @@ -1262,11 +1370,15 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_gmc_tmz_set(adev); - if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { + if (amdgpu_num_kcq == -1) { + amdgpu_num_kcq = 8; + } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { amdgpu_num_kcq = 8; dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n"); } + amdgpu_gmc_noretry_set(adev); + return 0; } @@ -1669,6 +1781,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_CARRIZO: case CHIP_STONEY: case CHIP_VEGA20: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: default: return 0; case CHIP_VEGA10: @@ -1700,12 +1814,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_NAVI12: chip_name = "navi12"; break; - case CHIP_SIENNA_CICHLID: - chip_name = "sienna_cichlid"; - break; - case CHIP_NAVY_FLOUNDER: - chip_name = "navy_flounder"; - break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -3445,8 +3553,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev) /* make sure IB test finished before entering exclusive mode * to avoid preemption on IB test * */ - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { amdgpu_virt_request_full_gpu(adev, false); + amdgpu_virt_fini_data_exchange(adev); + } /* disable all interrupts */ amdgpu_irq_disable_all(adev); @@ -4080,6 +4190,11 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, amdgpu_debugfs_wait_dump(adev); + if (amdgpu_sriov_vf(adev)) { + /* stop the data exchange thread */ + amdgpu_virt_fini_data_exchange(adev); + } + /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index c81206e6096f..7cc7af2a6822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -297,7 +297,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, take the current one */ if (active && !adev->have_disp_power_ref) { adev->have_disp_power_ref = true; - goto out; + return ret; } /* if we have no active crtcs, then drop the power ref we got before */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 81e4cf869f50..c241317edee7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -147,7 +147,7 @@ int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp = 0; int amdgpu_discovery = -1; int amdgpu_mes = 0; -int amdgpu_noretry; +int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = 0; int amdgpu_reset_method = -1; /* auto */ @@ -596,8 +596,13 @@ MODULE_PARM_DESC(mes, "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)"); module_param_named(mes, amdgpu_mes, int, 0444); +/** + * DOC: noretry (int) + * Disable retry faults in the GPU memory controller. + * (0 = retry enabled, 1 = retry disabled, -1 auto (default)) + */ MODULE_PARM_DESC(noretry, - "Disable retry faults (0 = retry enabled (default), 1 = retry disabled)"); + "Disable retry faults (0 = retry enabled, 1 = retry disabled, -1 auto (default))"); module_param_named(noretry, amdgpu_noretry, int, 0644); /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index e811fecc540f..8f4a8f8d8146 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -34,18 +34,31 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) { - /* TODO: Gaming SKUs don't have the FRU EEPROM. - * Use this hack to address hangs on modprobe on gaming SKUs - * until a proper solution can be implemented by only supporting - * the explicit chip IDs for VG20 Server cards - * - * TODO: Add list of supported Arcturus DIDs once confirmed + /* Only server cards have the FRU EEPROM + * TODO: See if we can figure this out dynamically instead of + * having to parse VBIOS versions. */ - if ((adev->asic_type == CHIP_VEGA20 && adev->pdev->device == 0x66a0) || - (adev->asic_type == CHIP_VEGA20 && adev->pdev->device == 0x66a1) || - (adev->asic_type == CHIP_VEGA20 && adev->pdev->device == 0x66a4)) - return true; - return false; + struct atom_context *atom_ctx = adev->mode_info.atom_context; + + /* VBIOS is of the format ###-DXXXYY-##. For SKU identification, + * we can use just the "DXXX" portion. If there were more models, we + * could convert the 3 characters to a hex integer and use a switch + * for ease/speed/readability. For now, 2 string comparisons are + * reasonable and not too expensive + */ + switch (adev->asic_type) { + case CHIP_VEGA20: + /* D161 and D163 are the VG20 server SKUs */ + if (strnstr(atom_ctx->vbios_version, "D161", + sizeof(atom_ctx->vbios_version)) || + strnstr(atom_ctx->vbios_version, "D163", + sizeof(atom_ctx->vbios_version))) + return true; + else + return false; + default: + return false; + } } static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h index f29a8611d69b..1308d976d60e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h @@ -26,4 +26,4 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev); -#endif // __AMDGPU_PRODINFO_H__ +#endif // __AMDGPU_FRU_EEPROM_H__ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index a611e78dd4ba..258498cbf1eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -217,6 +217,7 @@ struct amdgpu_gfx_funcs { int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status); void (*reset_ras_error_count) (struct amdgpu_device *adev); void (*init_spm_golden)(struct amdgpu_device *adev); + void (*query_ras_error_status) (struct amdgpu_device *adev); }; struct sq_work { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h new file mode 100644 index 000000000000..66ebc2e3b2ad --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -0,0 +1,43 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_GFXHUB_H__ +#define __AMDGPU_GFXHUB_H__ + +struct amdgpu_gfxhub_funcs { + u64 (*get_fb_location)(struct amdgpu_device *adev); + u64 (*get_mc_fb_offset)(struct amdgpu_device *adev); + void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); + int (*gart_enable)(struct amdgpu_device *adev); + + void (*gart_disable)(struct amdgpu_device *adev); + void (*set_fault_enable_default)(struct amdgpu_device *adev, bool value); + void (*init)(struct amdgpu_device *adev); + int (*get_xgmi_info)(struct amdgpu_device *adev); +}; + +struct amdgpu_gfxhub { + const struct amdgpu_gfxhub_funcs *funcs; +}; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 213ef090bb0e..36604d751d62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -413,6 +413,44 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) } } +/** + * amdgpu_noretry_set -- set per asic noretry defaults + * @adev: amdgpu_device pointer + * + * Set a per asic default for the no-retry parameter. + * + */ +void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) +{ + struct amdgpu_gmc *gmc = &adev->gmc; + + switch (adev->asic_type) { + case CHIP_RAVEN: + /* Raven currently has issues with noretry + * regardless of what we decide for other + * asics, we should leave raven with + * noretry = 0 until we root cause the + * issues. + */ + if (amdgpu_noretry == -1) + gmc->noretry = 0; + else + gmc->noretry = amdgpu_noretry; + break; + default: + /* default this to 0 for now, but we may want + * to change this in the future for certain + * GPUs as it can increase performance in + * certain cases. + */ + if (amdgpu_noretry == -1) + gmc->noretry = 0; + else + gmc->noretry = amdgpu_noretry; + break; + } +} + void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, bool enable) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index d61bbde4c7d2..aa0c83776ce0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -239,6 +239,7 @@ struct amdgpu_gmc { struct amdgpu_xgmi xgmi; struct amdgpu_irq_src ecc_irq; + int noretry; }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) @@ -300,6 +301,7 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev); +extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev); extern void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bccaf4f77647..a5aaff15f7ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -177,7 +177,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) break; case CHIP_VEGA10: /* turn runpm on if noretry=0 */ - if (!amdgpu_noretry) + if (!adev->gmc.noretry) adev->runpm = true; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 0c43d7fe893c..1ae9bdae7311 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -40,6 +40,7 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); + void (*query_ras_error_status)(struct amdgpu_device *adev); }; struct amdgpu_mmhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 2c66e20b2ed9..18be544d8c1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -161,10 +161,12 @@ static int psp_sw_init(void *handle) struct psp_context *psp = &adev->psp; int ret; - ret = psp_init_microcode(psp); - if (ret) { - DRM_ERROR("Failed to load psp firmware!\n"); - return ret; + if (!amdgpu_sriov_vf(adev)) { + ret = psp_init_microcode(psp); + if (ret) { + DRM_ERROR("Failed to load psp firmware!\n"); + return ret; + } } ret = psp_memory_training_init(psp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e5ea14774c0c..8bf6a7c056bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1027,58 +1027,6 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); } -static void amdgpu_ras_sysfs_add_bad_page_node(struct amdgpu_device *adev) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct attribute_group group; - struct bin_attribute *bin_attrs[] = { - &con->badpages_attr, - NULL, - }; - - con->badpages_attr = (struct bin_attribute) { - .attr = { - .name = "gpu_vram_bad_pages", - .mode = S_IRUGO, - }, - .size = 0, - .private = NULL, - .read = amdgpu_ras_sysfs_badpages_read, - }; - - group.name = RAS_FS_NAME; - group.bin_attrs = bin_attrs; - - sysfs_bin_attr_init(bin_attrs[0]); - - sysfs_update_group(&adev->dev->kobj, &group); -} - -static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct attribute *attrs[] = { - &con->features_attr.attr, - NULL - }; - struct attribute_group group = { - .name = RAS_FS_NAME, - .attrs = attrs, - }; - - con->features_attr = (struct device_attribute) { - .attr = { - .name = "features", - .mode = S_IRUGO, - }, - .show = amdgpu_ras_sysfs_features_read, - }; - - sysfs_attr_init(attrs[0]); - - return sysfs_create_group(&adev->dev->kobj, &group); -} - static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1300,13 +1248,43 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) /* debugfs end */ /* ras fs */ - +static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO, + amdgpu_ras_sysfs_badpages_read, NULL, 0); +static DEVICE_ATTR(features, S_IRUGO, + amdgpu_ras_sysfs_features_read, NULL); static int amdgpu_ras_fs_init(struct amdgpu_device *adev) { - amdgpu_ras_sysfs_create_feature_node(adev); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct attribute_group group = { + .name = RAS_FS_NAME, + }; + struct attribute *attrs[] = { + &con->features_attr.attr, + NULL + }; + struct bin_attribute *bin_attrs[] = { + NULL, + NULL, + }; + int r; - if (amdgpu_bad_page_threshold != 0) - amdgpu_ras_sysfs_add_bad_page_node(adev); + /* add features entry */ + con->features_attr = dev_attr_features; + group.attrs = attrs; + sysfs_attr_init(attrs[0]); + + if (amdgpu_bad_page_threshold != 0) { + /* add bad_page_features entry */ + bin_attr_gpu_vram_bad_pages.private = NULL; + con->badpages_attr = bin_attr_gpu_vram_bad_pages; + bin_attrs[0] = &con->badpages_attr; + group.bin_attrs = bin_attrs; + sysfs_bin_attr_init(bin_attrs[0]); + } + + r = sysfs_create_group(&adev->dev->kobj, &group); + if (r) + dev_err(adev->dev, "Failed to create RAS sysfs group!"); return 0; } @@ -1498,6 +1476,45 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) } } +/* Parse RdRspStatus and WrRspStatus */ +void amdgpu_ras_error_status_query(struct amdgpu_device *adev, + struct ras_query_if *info) +{ + /* + * Only two block need to query read/write + * RspStatus at current state + */ + switch (info->head.block) { + case AMDGPU_RAS_BLOCK__GFX: + if (adev->gfx.funcs->query_ras_error_status) + adev->gfx.funcs->query_ras_error_status(adev); + break; + case AMDGPU_RAS_BLOCK__MMHUB: + if (adev->mmhub.funcs->query_ras_error_status) + adev->mmhub.funcs->query_ras_error_status(adev); + break; + default: + break; + } +} + +static void amdgpu_ras_query_err_status(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_manager *obj; + + if (!con) + return; + + list_for_each_entry(obj, &con->head, node) { + struct ras_query_if info = { + .head = obj->head, + }; + + amdgpu_ras_error_status_query(adev, &info); + } +} + /* recovery begin */ /* return 0 on success. @@ -1568,8 +1585,10 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) } list_for_each_entry(remote_adev, - device_list_handle, gmc.xgmi.head) + device_list_handle, gmc.xgmi.head) { + amdgpu_ras_query_err_status(remote_adev); amdgpu_ras_log_on_err_counter(remote_adev); + } amdgpu_put_xgmi_hive(hive); } @@ -1967,8 +1986,7 @@ static int amdgpu_ras_check_asic_type(struct amdgpu_device *adev) { if (adev->asic_type != CHIP_VEGA10 && adev->asic_type != CHIP_VEGA20 && - adev->asic_type != CHIP_ARCTURUS && - adev->asic_type != CHIP_SIENNA_CICHLID) + adev->asic_type != CHIP_ARCTURUS) return 1; else return 0; @@ -2012,6 +2030,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, *supported = amdgpu_ras_enable == 0 ? 0 : *hw_supported & amdgpu_ras_mask; + adev->ras_features = *supported; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 63e734a125fb..ee9480d14cbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -35,7 +35,7 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) -TRACE_EVENT(amdgpu_mm_rreg, +TRACE_EVENT(amdgpu_device_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), TP_STRUCT__entry( @@ -54,7 +54,7 @@ TRACE_EVENT(amdgpu_mm_rreg, (unsigned long)__entry->value) ); -TRACE_EVENT(amdgpu_mm_wreg, +TRACE_EVENT(amdgpu_device_wreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), TP_STRUCT__entry( @@ -321,6 +321,49 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs, TP_ARGS(mapping) ); +TRACE_EVENT(amdgpu_vm_update_ptes, + TP_PROTO(struct amdgpu_vm_update_params *p, + uint64_t start, uint64_t end, + unsigned int nptes, uint64_t dst, + uint64_t incr, uint64_t flags, + pid_t pid, uint64_t vm_ctx), + TP_ARGS(p, start, end, nptes, dst, incr, flags, pid, vm_ctx), + TP_STRUCT__entry( + __field(u64, start) + __field(u64, end) + __field(u64, flags) + __field(unsigned int, nptes) + __field(u64, incr) + __field(pid_t, pid) + __field(u64, vm_ctx) + __dynamic_array(u64, dst, nptes) + ), + + TP_fast_assign( + unsigned int i; + + __entry->start = start; + __entry->end = end; + __entry->flags = flags; + __entry->incr = incr; + __entry->nptes = nptes; + __entry->pid = pid; + __entry->vm_ctx = vm_ctx; + for (i = 0; i < nptes; ++i) { + u64 addr = p->pages_addr ? amdgpu_vm_map_gart( + p->pages_addr, dst) : dst; + + ((u64 *)__get_dynamic_array(dst))[i] = addr; + dst += incr; + } + ), + TP_printk("pid:%u vm_ctx:0x%llx start:0x%010llx end:0x%010llx," + " flags:0x%llx, incr:%llu, dst:\n%s", __entry->pid, + __entry->vm_ctx, __entry->start, __entry->end, + __entry->flags, __entry->incr, __print_array( + __get_dynamic_array(dst), __entry->nptes, 8)) +); + TRACE_EVENT(amdgpu_vm_set_ptes, TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags, bool direct), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f76961d17246..d0aea5e39531 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -31,6 +31,12 @@ #include "soc15.h" #include "nv.h" +#define POPULATE_UCODE_INFO(vf2pf_info, ucode, ver) \ + do { \ + vf2pf_info->ucode_info[ucode].id = ucode; \ + vf2pf_info->ucode_info[ucode].version = ver; \ + } while (0) + bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) { /* By now all MMIO pages except mailbox are blocked */ @@ -239,10 +245,10 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) } -int amdgpu_virt_fw_reserve_get_checksum(void *obj, - unsigned long obj_size, - unsigned int key, - unsigned int chksum) +unsigned int amd_sriov_msg_checksum(void *obj, + unsigned long obj_size, + unsigned int key, + unsigned int checksum) { unsigned int ret = key; unsigned long i = 0; @@ -252,9 +258,9 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, /* calculate checksum */ for (i = 0; i < obj_size; ++i) ret += *(pos + i); - /* minus the chksum itself */ - pos = (char *)&chksum; - for (i = 0; i < sizeof(chksum); ++i) + /* minus the checksum itself */ + pos = (char *)&checksum; + for (i = 0; i < sizeof(checksum); ++i) ret -= *(pos + i); return ret; } @@ -415,33 +421,188 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev, } } -void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) +static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) { - uint32_t pf2vf_size = 0; - uint32_t checksum = 0; + struct amd_sriov_msg_pf2vf_info_header *pf2vf_info = adev->virt.fw_reserve.p_pf2vf; + uint32_t checksum; uint32_t checkval; - char *str; + + if (adev->virt.fw_reserve.p_pf2vf == NULL) + return -EINVAL; + + if (pf2vf_info->size > 1024) { + DRM_ERROR("invalid pf2vf message size\n"); + return -EINVAL; + } + + switch (pf2vf_info->version) { + case 1: + checksum = ((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->checksum; + checkval = amd_sriov_msg_checksum( + adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size, + adev->virt.fw_reserve.checksum_key, checksum); + if (checksum != checkval) { + DRM_ERROR("invalid pf2vf message\n"); + return -EINVAL; + } + + adev->virt.gim_feature = + ((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->feature_flags; + break; + case 2: + /* TODO: missing key, need to add it later */ + checksum = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->checksum; + checkval = amd_sriov_msg_checksum( + adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size, + 0, checksum); + if (checksum != checkval) { + DRM_ERROR("invalid pf2vf message\n"); + return -EINVAL; + } + + adev->virt.vf2pf_update_interval_ms = + ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->vf2pf_update_interval_ms; + adev->virt.gim_feature = + ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->feature_flags.all; + + break; + default: + DRM_ERROR("invalid pf2vf version\n"); + return -EINVAL; + } + + /* correct too large or too little interval value */ + if (adev->virt.vf2pf_update_interval_ms < 200 || adev->virt.vf2pf_update_interval_ms > 10000) + adev->virt.vf2pf_update_interval_ms = 2000; + + return 0; +} + +static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev) +{ + struct amd_sriov_msg_vf2pf_info *vf2pf_info; + vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf; + + if (adev->virt.fw_reserve.p_vf2pf == NULL) + return; + + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCE, adev->vce.fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_UVD, adev->uvd.fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MC, adev->gmc.fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ME, adev->gfx.me_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_PFP, adev->gfx.pfp_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_CE, adev->gfx.ce_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC, adev->gfx.rlc_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLC, adev->gfx.rlc_srlc_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLG, adev->gfx.rlc_srlg_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd_fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS, adev->psp.ta_ras_ucode_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI, adev->psp.ta_xgmi_ucode_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SMC, adev->pm.fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA, adev->sdma.instance[0].fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA2, adev->sdma.instance[1].fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCN, adev->vcn.fw_version); + POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_DMCU, adev->dm.dmcu_fw_version); +} + +static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) +{ + struct amd_sriov_msg_vf2pf_info *vf2pf_info; + struct ttm_resource_manager *vram_man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + + vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf; + + if (adev->virt.fw_reserve.p_vf2pf == NULL) + return -EINVAL; + + memset(vf2pf_info, 0, sizeof(struct amd_sriov_msg_vf2pf_info)); + + vf2pf_info->header.size = sizeof(struct amd_sriov_msg_vf2pf_info); + vf2pf_info->header.version = AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER; + +#ifdef MODULE + if (THIS_MODULE->version != NULL) + strcpy(vf2pf_info->driver_version, THIS_MODULE->version); + else +#endif + strcpy(vf2pf_info->driver_version, "N/A"); + + vf2pf_info->pf2vf_version_required = 0; // no requirement, guest understands all + vf2pf_info->driver_cert = 0; + vf2pf_info->os_info.all = 0; + + vf2pf_info->fb_usage = amdgpu_vram_mgr_usage(vram_man) >> 20; + vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(vram_man) >> 20; + vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20; + vf2pf_info->fb_vis_size = adev->gmc.visible_vram_size >> 20; + + amdgpu_virt_populate_vf2pf_ucode_info(adev); + + /* TODO: read dynamic info */ + vf2pf_info->gfx_usage = 0; + vf2pf_info->compute_usage = 0; + vf2pf_info->encode_usage = 0; + vf2pf_info->decode_usage = 0; + + vf2pf_info->checksum = + amd_sriov_msg_checksum( + vf2pf_info, vf2pf_info->header.size, 0, 0); + + return 0; +} + +void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, virt.vf2pf_work.work); + + amdgpu_virt_read_pf2vf_data(adev); + amdgpu_virt_write_vf2pf_data(adev); + + schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms); +} + +void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev) +{ + if (adev->virt.vf2pf_update_interval_ms != 0) { + DRM_INFO("clean up the vf2pf work item\n"); + flush_delayed_work(&adev->virt.vf2pf_work); + cancel_delayed_work_sync(&adev->virt.vf2pf_work); + } +} + +void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) +{ uint64_t bp_block_offset = 0; uint32_t bp_block_size = 0; - struct amdgim_pf2vf_info_v2 *pf2vf_v2 = NULL; + struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL; adev->virt.fw_reserve.p_pf2vf = NULL; adev->virt.fw_reserve.p_vf2pf = NULL; + adev->virt.vf2pf_update_interval_ms = 0; if (adev->mman.fw_vram_usage_va != NULL) { + adev->virt.vf2pf_update_interval_ms = 2000; + adev->virt.fw_reserve.p_pf2vf = - (struct amd_sriov_msg_pf2vf_info_header *)( - adev->mman.fw_vram_usage_va + AMDGIM_DATAEXCHANGE_OFFSET); - AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size); - AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum); - AMDGPU_FW_VRAM_PF2VF_READ(adev, feature_flags, &adev->virt.gim_feature); - - /* pf2vf message must be in 4K */ - if (pf2vf_size > 0 && pf2vf_size < 4096) { - if (adev->virt.fw_reserve.p_pf2vf->version == 2) { - pf2vf_v2 = (struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf; - bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_L & 0xFFFFFFFF) | - ((((uint64_t)pf2vf_v2->bp_block_offset_H) << 32) & 0xFFFFFFFF00000000); + (struct amd_sriov_msg_pf2vf_info_header *) + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); + adev->virt.fw_reserve.p_vf2pf = + (struct amd_sriov_msg_vf2pf_info_header *) + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); + + amdgpu_virt_read_pf2vf_data(adev); + amdgpu_virt_write_vf2pf_data(adev); + + /* bad page handling for version 2 */ + if (adev->virt.fw_reserve.p_pf2vf->version == 2) { + pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf; + + bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) | + ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000); bp_block_size = pf2vf_v2->bp_block_size; if (bp_block_size && !adev->virt.ras_init_done) @@ -450,37 +611,11 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) if (adev->virt.ras_init_done) amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size); } + } - checkval = amdgpu_virt_fw_reserve_get_checksum( - adev->virt.fw_reserve.p_pf2vf, pf2vf_size, - adev->virt.fw_reserve.checksum_key, checksum); - if (checkval == checksum) { - adev->virt.fw_reserve.p_vf2pf = - ((void *)adev->virt.fw_reserve.p_pf2vf + - pf2vf_size); - memset((void *)adev->virt.fw_reserve.p_vf2pf, 0, - sizeof(amdgim_vf2pf_info)); - AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.version, - AMDGPU_FW_VRAM_VF2PF_VER); - AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.size, - sizeof(amdgim_vf2pf_info)); - AMDGPU_FW_VRAM_VF2PF_READ(adev, driver_version, - &str); -#ifdef MODULE - if (THIS_MODULE->version != NULL) - strcpy(str, THIS_MODULE->version); - else -#endif - strcpy(str, "N/A"); - AMDGPU_FW_VRAM_VF2PF_WRITE(adev, driver_cert, - 0); - AMDGPU_FW_VRAM_VF2PF_WRITE(adev, checksum, - amdgpu_virt_fw_reserve_get_checksum( - adev->virt.fw_reserve.p_vf2pf, - pf2vf_size, - adev->virt.fw_reserve.checksum_key, 0)); - } - } + if (adev->virt.vf2pf_update_interval_ms != 0) { + INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item); + schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b2046c3a404d..8dd624c20f89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -24,6 +24,8 @@ #ifndef AMDGPU_VIRT_H #define AMDGPU_VIRT_H +#include "amdgv_sriovmsg.h" + #define AMDGPU_SRIOV_CAPS_SRIOV_VBIOS (1 << 0) /* vBIOS is sr-iov ready */ #define AMDGPU_SRIOV_CAPS_ENABLE_IOV (1 << 1) /* sr-iov is enabled on this GPU */ #define AMDGPU_SRIOV_CAPS_IS_VF (1 << 2) /* this GPU is a virtual function */ @@ -79,7 +81,10 @@ struct amdgpu_virt_fw_reserve { struct amd_sriov_msg_vf2pf_info_header *p_vf2pf; unsigned int checksum_key; }; + /* + * Legacy GIM header + * * Defination between PF and VF * Structures forcibly aligned to 4 to keep the same style as PF. */ @@ -101,15 +106,7 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_PP_ONE_VF = (1 << 4), }; -struct amd_sriov_msg_pf2vf_info_header { - /* the total structure size in byte. */ - uint32_t size; - /* version of this structure, written by the GIM */ - uint32_t version; - /* reserved */ - uint32_t reserved[2]; -} __aligned(4); -struct amdgim_pf2vf_info_v1 { +struct amdgim_pf2vf_info_v1 { /* header contains size and version */ struct amd_sriov_msg_pf2vf_info_header header; /* max_width * max_height */ @@ -128,54 +125,6 @@ struct amdgim_pf2vf_info_v1 { unsigned int checksum; } __aligned(4); -struct amdgim_pf2vf_info_v2 { - /* header contains size and version */ - struct amd_sriov_msg_pf2vf_info_header header; - /* use private key from mailbox 2 to create chueksum */ - uint32_t checksum; - /* The features flags of the GIM driver supports. */ - uint32_t feature_flags; - /* max_width * max_height */ - uint32_t uvd_enc_max_pixels_count; - /* 16x16 pixels/sec, codec independent */ - uint32_t uvd_enc_max_bandwidth; - /* max_width * max_height */ - uint32_t vce_enc_max_pixels_count; - /* 16x16 pixels/sec, codec independent */ - uint32_t vce_enc_max_bandwidth; - /* Bad pages block position in BYTE */ - uint32_t bp_block_offset_L; - uint32_t bp_block_offset_H; - /* Bad pages block size in BYTE */ - uint32_t bp_block_size; - /* MEC FW position in kb from the start of VF visible frame buffer */ - uint32_t mecfw_kboffset_L; - uint32_t mecfw_kboffset_H; - /* MEC FW size in KB */ - uint32_t mecfw_ksize; - /* UVD FW position in kb from the start of VF visible frame buffer */ - uint32_t uvdfw_kboffset_L; - uint32_t uvdfw_kboffset_H; - /* UVD FW size in KB */ - uint32_t uvdfw_ksize; - /* VCE FW position in kb from the start of VF visible frame buffer */ - uint32_t vcefw_kboffset_L; - uint32_t vcefw_kboffset_H; - /* VCE FW size in KB */ - uint32_t vcefw_ksize; - uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (18 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 0)]; -} __aligned(4); - - -struct amd_sriov_msg_vf2pf_info_header { - /* the total structure size in byte. */ - uint32_t size; - /*version of this structure, written by the guest */ - uint32_t version; - /* reserved */ - uint32_t reserved[2]; -} __aligned(4); - struct amdgim_vf2pf_info_v1 { /* header contains size and version */ struct amd_sriov_msg_vf2pf_info_header header; @@ -237,31 +186,6 @@ struct amdgim_vf2pf_info_v2 { uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)]; } __aligned(4); -#define AMDGPU_FW_VRAM_VF2PF_VER 2 -typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ; - -#define AMDGPU_FW_VRAM_VF2PF_WRITE(adev, field, val) \ - do { \ - ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field = (val); \ - } while (0) - -#define AMDGPU_FW_VRAM_VF2PF_READ(adev, field, val) \ - do { \ - (*val) = ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field; \ - } while (0) - -#define AMDGPU_FW_VRAM_PF2VF_READ(adev, field, val) \ - do { \ - if (!adev->virt.fw_reserve.p_pf2vf) \ - *(val) = 0; \ - else { \ - if (adev->virt.fw_reserve.p_pf2vf->version == 1) \ - *(val) = ((struct amdgim_pf2vf_info_v1 *)adev->virt.fw_reserve.p_pf2vf)->field; \ - if (adev->virt.fw_reserve.p_pf2vf->version == 2) \ - *(val) = ((struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf)->field; \ - } \ - } while (0) - struct amdgpu_virt_ras_err_handler_data { /* point to bad page records array */ struct eeprom_table_record *bps; @@ -285,7 +209,7 @@ struct amdgpu_virt { struct work_struct flr_work; struct amdgpu_mm_table mm_table; const struct amdgpu_virt_ops *ops; - struct amdgpu_vf_error_buffer vf_errors; + struct amdgpu_vf_error_buffer vf_errors; struct amdgpu_virt_fw_reserve fw_reserve; uint32_t gim_feature; uint32_t reg_access_mode; @@ -293,6 +217,10 @@ struct amdgpu_virt { bool tdr_debug; struct amdgpu_virt_ras_err_handler_data *virt_eh_data; bool ras_init_done; + + /* vf2pf message */ + struct delayed_work vf2pf_work; + uint32_t vf2pf_update_interval_ms; }; #define amdgpu_sriov_enabled(adev) \ @@ -341,11 +269,9 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev); int amdgpu_virt_wait_reset(struct amdgpu_device *adev); int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); -int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, - unsigned int key, - unsigned int chksum); void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); +void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev); void amdgpu_detect_virtualization(struct amdgpu_device *adev); bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 420931d36732..2b65e83c808b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1502,6 +1502,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, pt = cursor.entry->base.bo; shift = parent_shift; + frag_end = max(frag_end, ALIGN(frag_start + 1, + 1ULL << shift)); } /* Looks good so far, calculate parameters for the update */ @@ -1513,19 +1515,26 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, entry_end = min(entry_end, end); do { + struct amdgpu_vm *vm = params->vm; uint64_t upd_end = min(entry_end, frag_end); unsigned nptes = (upd_end - frag_start) >> shift; + uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); /* This can happen when we set higher level PDs to * silent to stop fault floods. */ nptes = max(nptes, 1u); + + trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, + nptes, dst, incr, upd_flags, + vm->task_info.pid, + vm->immediate.fence_context); amdgpu_vm_update_flags(params, pt, cursor.level, pe_start, dst, nptes, incr, - flags | AMDGPU_PTE_FRAG(frag)); + upd_flags); pe_start += nptes * 8; - dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift; + dst += nptes * incr; frag_start = upd_end; if (frag_start >= frag_end) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 770025a5e500..7c46937c1c0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -98,7 +98,7 @@ struct amdgpu_bo_list_entry; #define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48) #define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL) -/* How to programm VM fault handling */ +/* How to program VM fault handling */ #define AMDGPU_VM_FAULT_STOP_NEVER 0 #define AMDGPU_VM_FAULT_STOP_FIRST 1 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h new file mode 100644 index 000000000000..5355827ed0ae --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -0,0 +1,276 @@ +/* + * Copyright 2018-2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGV_SRIOV_MSG__H_ +#define AMDGV_SRIOV_MSG__H_ + +/* unit in kilobytes */ +#define AMD_SRIOV_MSG_VBIOS_OFFSET 0 +#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64 +#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB +#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4 + +/* + * layout + * 0 64KB 65KB 66KB + * | VBIOS | PF2VF | VF2PF | Bad Page | ... + * | 64KB | 1KB | 1KB | + */ +#define AMD_SRIOV_MSG_SIZE_KB 1 +#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB +#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB) +#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB) + +/* + * PF2VF history log: + * v1 defined in amdgim + * v2 current + * + * VF2PF history log: + * v1 defined in amdgim + * v2 defined in amdgim + * v3 current + */ +#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2 +#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3 + +#define AMD_SRIOV_MSG_RESERVE_UCODE 24 + +enum amd_sriov_ucode_engine_id { + AMD_SRIOV_UCODE_ID_VCE = 0, + AMD_SRIOV_UCODE_ID_UVD, + AMD_SRIOV_UCODE_ID_MC, + AMD_SRIOV_UCODE_ID_ME, + AMD_SRIOV_UCODE_ID_PFP, + AMD_SRIOV_UCODE_ID_CE, + AMD_SRIOV_UCODE_ID_RLC, + AMD_SRIOV_UCODE_ID_RLC_SRLC, + AMD_SRIOV_UCODE_ID_RLC_SRLG, + AMD_SRIOV_UCODE_ID_RLC_SRLS, + AMD_SRIOV_UCODE_ID_MEC, + AMD_SRIOV_UCODE_ID_MEC2, + AMD_SRIOV_UCODE_ID_SOS, + AMD_SRIOV_UCODE_ID_ASD, + AMD_SRIOV_UCODE_ID_TA_RAS, + AMD_SRIOV_UCODE_ID_TA_XGMI, + AMD_SRIOV_UCODE_ID_SMC, + AMD_SRIOV_UCODE_ID_SDMA, + AMD_SRIOV_UCODE_ID_SDMA2, + AMD_SRIOV_UCODE_ID_VCN, + AMD_SRIOV_UCODE_ID_DMCU, + AMD_SRIOV_UCODE_ID__MAX +}; + +#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed + +union amd_sriov_msg_feature_flags { + struct { + uint32_t error_log_collect : 1; + uint32_t host_load_ucodes : 1; + uint32_t host_flr_vramlost : 1; + uint32_t mm_bw_management : 1; + uint32_t pp_one_vf_mode : 1; + uint32_t reserved : 27; + } flags; + uint32_t all; +}; + +union amd_sriov_msg_os_info { + struct { + uint32_t windows : 1; + uint32_t reserved : 31; + } info; + uint32_t all; +}; + +struct amd_sriov_msg_pf2vf_info_header { + /* the total structure size in byte */ + uint32_t size; + /* version of this structure, written by the HOST */ + uint32_t version; + /* reserved */ + uint32_t reserved[2]; +}; + +struct amd_sriov_msg_pf2vf_info { + /* header contains size and version */ + struct amd_sriov_msg_pf2vf_info_header header; + /* use private key from mailbox 2 to create checksum */ + uint32_t checksum; + /* The features flags of the HOST driver supports */ + union amd_sriov_msg_feature_flags feature_flags; + /* (max_width * max_height * fps) / (16 * 16) */ + uint32_t hevc_enc_max_mb_per_second; + /* (max_width * max_height) / (16 * 16) */ + uint32_t hevc_enc_max_mb_per_frame; + /* (max_width * max_height * fps) / (16 * 16) */ + uint32_t avc_enc_max_mb_per_second; + /* (max_width * max_height) / (16 * 16) */ + uint32_t avc_enc_max_mb_per_frame; + /* MEC FW position in BYTE from the start of VF visible frame buffer */ + uint64_t mecfw_offset; + /* MEC FW size in BYTE */ + uint32_t mecfw_size; + /* UVD FW position in BYTE from the start of VF visible frame buffer */ + uint64_t uvdfw_offset; + /* UVD FW size in BYTE */ + uint32_t uvdfw_size; + /* VCE FW position in BYTE from the start of VF visible frame buffer */ + uint64_t vcefw_offset; + /* VCE FW size in BYTE */ + uint32_t vcefw_size; + /* Bad pages block position in BYTE */ + uint32_t bp_block_offset_low; + uint32_t bp_block_offset_high; + /* Bad pages block size in BYTE */ + uint32_t bp_block_size; + /* frequency for VF to update the VF2PF area in msec, 0 = manual */ + uint32_t vf2pf_update_interval_ms; + /* identification in ROCm SMI */ + uint64_t uuid; + uint32_t fcn_idx; + /* reserved */ + uint32_t reserved[256-26]; +}; + +struct amd_sriov_msg_vf2pf_info_header { + /* the total structure size in byte */ + uint32_t size; + /* version of this structure, written by the guest */ + uint32_t version; + /* reserved */ + uint32_t reserved[2]; +}; + +struct amd_sriov_msg_vf2pf_info { + /* header contains size and version */ + struct amd_sriov_msg_vf2pf_info_header header; + uint32_t checksum; + /* driver version */ + uint8_t driver_version[64]; + /* driver certification, 1=WHQL, 0=None */ + uint32_t driver_cert; + /* guest OS type and version */ + union amd_sriov_msg_os_info os_info; + /* guest fb information in the unit of MB */ + uint32_t fb_usage; + /* guest gfx engine usage percentage */ + uint32_t gfx_usage; + /* guest gfx engine health percentage */ + uint32_t gfx_health; + /* guest compute engine usage percentage */ + uint32_t compute_usage; + /* guest compute engine health percentage */ + uint32_t compute_health; + /* guest avc engine usage percentage. 0xffff means N/A */ + uint32_t avc_enc_usage; + /* guest avc engine health percentage. 0xffff means N/A */ + uint32_t avc_enc_health; + /* guest hevc engine usage percentage. 0xffff means N/A */ + uint32_t hevc_enc_usage; + /* guest hevc engine usage percentage. 0xffff means N/A */ + uint32_t hevc_enc_health; + /* combined encode/decode usage */ + uint32_t encode_usage; + uint32_t decode_usage; + /* Version of PF2VF that VF understands */ + uint32_t pf2vf_version_required; + /* additional FB usage */ + uint32_t fb_vis_usage; + uint32_t fb_vis_size; + uint32_t fb_size; + /* guest ucode data, each one is 1.25 Dword */ + struct { + uint8_t id; + uint32_t version; + } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE]; + + /* reserved */ + uint32_t reserved[256-68]; +}; + +/* mailbox message send from guest to host */ +enum amd_sriov_mailbox_request_message { + MB_REQ_MSG_REQ_GPU_INIT_ACCESS = 1, + MB_REQ_MSG_REL_GPU_INIT_ACCESS, + MB_REQ_MSG_REQ_GPU_FINI_ACCESS, + MB_REQ_MSG_REL_GPU_FINI_ACCESS, + MB_REQ_MSG_REQ_GPU_RESET_ACCESS, + MB_REQ_MSG_REQ_GPU_INIT_DATA, + + MB_REQ_MSG_LOG_VF_ERROR = 200, +}; + +/* mailbox message send from host to guest */ +enum amd_sriov_mailbox_response_message { + MB_RES_MSG_CLR_MSG_BUF = 0, + MB_RES_MSG_READY_TO_ACCESS_GPU = 1, + MB_RES_MSG_FLR_NOTIFICATION, + MB_RES_MSG_FLR_NOTIFICATION_COMPLETION, + MB_RES_MSG_SUCCESS, + MB_RES_MSG_FAIL, + MB_RES_MSG_QUERY_ALIVE, + MB_RES_MSG_GPU_INIT_DATA_READY, + + MB_RES_MSG_TEXT_MESSAGE = 255 +}; + +/* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */ +enum amd_sriov_gpu_init_data_version { + GPU_INIT_DATA_READY_V1 = 1, +}; + +#pragma pack(pop) // Restore previous packing option + +/* checksum function between host and guest */ +unsigned int amd_sriov_msg_checksum(void *obj, + unsigned long obj_size, + unsigned int key, + unsigned int checksum); + +/* assertion at compile time */ +#ifdef __linux__ +#define stringification(s) _stringification(s) +#define _stringification(s) #s + +_Static_assert( + sizeof(struct amd_sriov_msg_vf2pf_info) == AMD_SRIOV_MSG_SIZE_KB << 10, + "amd_sriov_msg_vf2pf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB"); + +_Static_assert( + sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10, + "amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB"); + +_Static_assert( + AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0, + "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4"); + +_Static_assert( + AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX, + "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX"); + +#undef _stringification +#undef stringification +#endif + +#endif /* AMDGV_SRIOV_MSG__H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 401c99f0b2d0..db953e95f3d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -316,14 +316,9 @@ static int cik_ih_sw_fini(void *handle) static int cik_ih_hw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = cik_ih_irq_init(adev); - if (r) - return r; - - return 0; + return cik_ih_irq_init(adev); } static int cik_ih_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index cc93577dee03..b4d4b76538d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -47,6 +47,9 @@ static void dce_virtual_set_display_funcs(struct amdgpu_device *adev); static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev); static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev, int index); +static int dce_virtual_pageflip(struct amdgpu_device *adev, + unsigned crtc_id); +static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vblank_timer); static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, int crtc, enum amdgpu_interrupt_state state); @@ -171,8 +174,10 @@ static void dce_virtual_crtc_commit(struct drm_crtc *crtc) static void dce_virtual_crtc_disable(struct drm_crtc *crtc) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + struct drm_device *dev = crtc->dev; - drm_crtc_vblank_off(crtc); + if (dev->num_crtcs) + drm_crtc_vblank_off(crtc); amdgpu_crtc->enabled = false; amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; @@ -247,6 +252,11 @@ static int dce_virtual_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_virtual_crtc_helper_funcs); + hrtimer_init(&amdgpu_crtc->vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_set_expires(&amdgpu_crtc->vblank_timer, DCE_VIRTUAL_VBLANK_PERIOD); + amdgpu_crtc->vblank_timer.function = dce_virtual_vblank_timer_handle; + hrtimer_start(&amdgpu_crtc->vblank_timer, + DCE_VIRTUAL_VBLANK_PERIOD, HRTIMER_MODE_REL); return 0; } @@ -476,7 +486,7 @@ static int dce_virtual_hw_fini(void *handle) for (i = 0; i<adev->mode_info.num_crtc; i++) if (adev->mode_info.crtcs[i]) - dce_virtual_set_crtc_vblank_interrupt_state(adev, i, AMDGPU_IRQ_STATE_DISABLE); + hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer); return 0; } @@ -698,9 +708,15 @@ static enum hrtimer_restart dce_virtual_vblank_timer_handle(struct hrtimer *vbla struct amdgpu_crtc, vblank_timer); struct drm_device *ddev = amdgpu_crtc->base.dev; struct amdgpu_device *adev = drm_to_adev(ddev); + struct amdgpu_irq_src *source = adev->irq.client[AMDGPU_IRQ_CLIENTID_LEGACY].sources + [VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER]; + int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, + amdgpu_crtc->crtc_id); - drm_handle_vblank(ddev, amdgpu_crtc->crtc_id); - dce_virtual_pageflip(adev, amdgpu_crtc->crtc_id); + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(ddev, amdgpu_crtc->crtc_id); + dce_virtual_pageflip(adev, amdgpu_crtc->crtc_id); + } hrtimer_start(vblank_timer, DCE_VIRTUAL_VBLANK_PERIOD, HRTIMER_MODE_REL); @@ -716,21 +732,6 @@ static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *ad return; } - if (state && !adev->mode_info.crtcs[crtc]->vsync_timer_enabled) { - DRM_DEBUG("Enable software vsync timer\n"); - hrtimer_init(&adev->mode_info.crtcs[crtc]->vblank_timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer_set_expires(&adev->mode_info.crtcs[crtc]->vblank_timer, - DCE_VIRTUAL_VBLANK_PERIOD); - adev->mode_info.crtcs[crtc]->vblank_timer.function = - dce_virtual_vblank_timer_handle; - hrtimer_start(&adev->mode_info.crtcs[crtc]->vblank_timer, - DCE_VIRTUAL_VBLANK_PERIOD, HRTIMER_MODE_REL); - } else if (!state && adev->mode_info.crtcs[crtc]->vsync_timer_enabled) { - DRM_DEBUG("Disable software vsync timer\n"); - hrtimer_cancel(&adev->mode_info.crtcs[crtc]->vblank_timer); - } - adev->mode_info.crtcs[crtc]->vsync_timer_enabled = state; DRM_DEBUG("[FM]set crtc %d vblank interrupt state %d\n", crtc, state); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 17fb2efdadd3..9792ec737029 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3610,6 +3610,9 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) if (!gfx_v10_0_navi10_gfxoff_should_enable(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; break; + case CHIP_NAVY_FLOUNDER: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d898c9ff3526..6959aebae6d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -49,6 +49,7 @@ #include "amdgpu_ras.h" #include "gfx_v9_4.h" +#include "gfx_v9_0.h" #include "asic_reg/pwr/pwr_10_0_offset.h" #include "asic_reg/pwr/pwr_10_0_sh_mask.h" @@ -788,7 +789,6 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); -static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, @@ -2075,6 +2075,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = { .ras_error_inject = &gfx_v9_4_ras_error_inject, .query_ras_error_count = &gfx_v9_4_query_ras_error_count, .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count, + .query_ras_error_status = &gfx_v9_4_query_ras_error_status, }; static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) @@ -2196,7 +2197,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, int mec, int pipe, int queue) { - int r; unsigned irq_type; struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; unsigned int hw_prio; @@ -2221,13 +2221,8 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type, hw_prio); - if (r) - return r; - - - return 0; + return amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type, hw_prio); } static int gfx_v9_0_sw_init(void *handle) @@ -2402,7 +2397,8 @@ static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) /* TODO */ } -static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) +void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, + u32 instance) { u32 data; @@ -2560,14 +2556,14 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, - !!amdgpu_noretry); + !!adev->gmc.noretry); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); } else { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, - !!amdgpu_noretry); + !!adev->gmc.noretry); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, (adev->gmc.private_aperture_start >> 48)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h index fa5a3fbaf6ab..dfe8d4841f58 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h @@ -26,9 +26,7 @@ extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block; -void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num); - -uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); -int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); +void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, + u32 instance); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c index bd85aed3523a..bc699d680ce8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -992,3 +992,32 @@ int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if) return ret; } + +static const struct soc15_reg_entry gfx_v9_4_rdrsp_status_regs = + { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 }; + +void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) +{ + uint32_t i, j; + uint32_t reg_value; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < gfx_v9_4_rdrsp_status_regs.se_num; i++) { + for (j = 0; j < gfx_v9_4_rdrsp_status_regs.instance; + j++) { + gfx_v9_4_select_se_sh(adev, i, 0, j); + reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( + gfx_v9_4_rdrsp_status_regs)); + if (reg_value) + dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n", + j, reg_value); + } + } + + gfx_v9_4_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); +} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h index 1ffecc5c0f0a..875f18473a98 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h @@ -34,4 +34,6 @@ int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev); +void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev); + #endif /* __GFX_V9_4_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 529e46386a50..fad887a66886 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -245,7 +245,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !amdgpu_noretry); + !adev->gmc.noretry); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, @@ -403,3 +403,13 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) hub->eng_addr_distance = mmVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } + + +const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { + .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, + .setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs, + .gart_enable = gfxhub_v1_0_gart_enable, + .gart_disable = gfxhub_v1_0_gart_disable, + .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, + .init = gfxhub_v1_0_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h index 92d3a70cd9b1..0c46672bbf49 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h @@ -33,4 +33,5 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); +extern const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c index c0ab71df0d90..1e24b6d51e41 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c @@ -21,6 +21,7 @@ * */ #include "amdgpu.h" +#include "gfxhub_v1_0.h" #include "gfxhub_v1_1.h" #include "gc/gc_9_2_1_offset.h" @@ -28,7 +29,7 @@ #include "soc15_common.h" -int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) +static int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) { u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL); u32 max_region = @@ -66,3 +67,13 @@ int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) return 0; } + +const struct amdgpu_gfxhub_funcs gfxhub_v1_1_funcs = { + .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, + .setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs, + .gart_enable = gfxhub_v1_0_gart_enable, + .gart_disable = gfxhub_v1_0_gart_disable, + .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, + .init = gfxhub_v1_0_init, + .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h index d753cf28a0a6..ae5759ffbee3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h @@ -24,6 +24,6 @@ #ifndef __GFXHUB_V1_1_H__ #define __GFXHUB_V1_1_H__ -int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev); +extern const struct amdgpu_gfxhub_funcs gfxhub_v1_1_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index b882ac59879a..456360bf58fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -102,7 +102,7 @@ gfxhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, GCVM_L2_PROTECTION_FAULT_STATUS, RW)); } -u64 gfxhub_v2_0_get_fb_location(struct amdgpu_device *adev) +static u64 gfxhub_v2_0_get_fb_location(struct amdgpu_device *adev) { u64 base = RREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE); @@ -112,12 +112,12 @@ u64 gfxhub_v2_0_get_fb_location(struct amdgpu_device *adev) return base; } -u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) +static u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) { return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24; } -void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, +static void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; @@ -153,11 +153,6 @@ static void gfxhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; if (!amdgpu_sriov_vf(adev)) { - /* - * the new L1 policy will block SRIOV guest from writing - * these regs, and they will be programed at host. - * so skip programing these regs. - */ /* Disable AGP. */ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0); @@ -318,7 +313,7 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !amdgpu_noretry); + !adev->gmc.noretry); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, @@ -347,7 +342,7 @@ static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev) } } -int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev) +static int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev) { /* GART Enable. */ gfxhub_v2_0_init_gart_aperture_regs(adev); @@ -363,7 +358,7 @@ int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev) return 0; } -void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev) +static void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; @@ -394,7 +389,7 @@ void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @value: true redirects VM faults to the default page */ -void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, +static void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; @@ -436,7 +431,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_0_vmhub_funcs = { .get_invalidate_req = gfxhub_v2_0_get_invalidate_req, }; -void gfxhub_v2_0_init(struct amdgpu_device *adev) +static void gfxhub_v2_0_init(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; @@ -477,3 +472,13 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs; } + +const struct amdgpu_gfxhub_funcs gfxhub_v2_0_funcs = { + .get_fb_location = gfxhub_v2_0_get_fb_location, + .get_mc_fb_offset = gfxhub_v2_0_get_mc_fb_offset, + .setup_vm_pt_regs = gfxhub_v2_0_setup_vm_pt_regs, + .gart_enable = gfxhub_v2_0_gart_enable, + .gart_disable = gfxhub_v2_0_gart_disable, + .set_fault_enable_default = gfxhub_v2_0_set_fault_enable_default, + .init = gfxhub_v2_0_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h index 392b8cd94fc0..9ddc35cd53d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h @@ -24,14 +24,6 @@ #ifndef __GFXHUB_V2_0_H__ #define __GFXHUB_V2_0_H__ -u64 gfxhub_v2_0_get_fb_location(struct amdgpu_device *adev); -int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev); -void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev); -void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, - bool value); -void gfxhub_v2_0_init(struct amdgpu_device *adev); -u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev); -void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t page_table_base); +extern const struct amdgpu_gfxhub_funcs gfxhub_v2_0_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 237a9ff5afa0..724bb29e9bb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -102,7 +102,7 @@ gfxhub_v2_1_print_l2_protection_fault_status(struct amdgpu_device *adev, GCVM_L2_PROTECTION_FAULT_STATUS, RW)); } -u64 gfxhub_v2_1_get_fb_location(struct amdgpu_device *adev) +static u64 gfxhub_v2_1_get_fb_location(struct amdgpu_device *adev) { u64 base = RREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE); @@ -112,12 +112,12 @@ u64 gfxhub_v2_1_get_fb_location(struct amdgpu_device *adev) return base; } -u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev) +static u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev) { return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24; } -void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, +static void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; @@ -319,7 +319,7 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !amdgpu_noretry); + !adev->gmc.noretry); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, @@ -348,7 +348,7 @@ static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev) } } -int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev) +static int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev) { if (amdgpu_sriov_vf(adev)) { /* @@ -376,7 +376,7 @@ int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev) return 0; } -void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev) +static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; @@ -405,7 +405,7 @@ void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @value: true redirects VM faults to the default page */ -void gfxhub_v2_1_set_fault_enable_default(struct amdgpu_device *adev, +static void gfxhub_v2_1_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; @@ -454,7 +454,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_1_vmhub_funcs = { .get_invalidate_req = gfxhub_v2_1_get_invalidate_req, }; -void gfxhub_v2_1_init(struct amdgpu_device *adev) +static void gfxhub_v2_1_init(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; @@ -496,7 +496,7 @@ void gfxhub_v2_1_init(struct amdgpu_device *adev) hub->vmhub_funcs = &gfxhub_v2_1_vmhub_funcs; } -int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev) +static int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev) { u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_CNTL); u32 max_region = @@ -531,3 +531,14 @@ int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev) return 0; } + +const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { + .get_fb_location = gfxhub_v2_1_get_fb_location, + .get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset, + .setup_vm_pt_regs = gfxhub_v2_1_setup_vm_pt_regs, + .gart_enable = gfxhub_v2_1_gart_enable, + .gart_disable = gfxhub_v2_1_gart_disable, + .set_fault_enable_default = gfxhub_v2_1_set_fault_enable_default, + .init = gfxhub_v2_1_init, + .get_xgmi_info = gfxhub_v2_1_get_xgmi_info, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h index 3452a4e9a3da..f75c2eccfad9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.h @@ -24,16 +24,6 @@ #ifndef __GFXHUB_V2_1_H__ #define __GFXHUB_V2_1_H__ -u64 gfxhub_v2_1_get_fb_location(struct amdgpu_device *adev); -int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev); -void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev); -void gfxhub_v2_1_set_fault_enable_default(struct amdgpu_device *adev, - bool value); -void gfxhub_v2_1_init(struct amdgpu_device *adev); -u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev); -void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t page_table_base); - -int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev); +extern const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 31359e519d69..dbc8b76b9b78 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -634,11 +634,26 @@ static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev) adev->mmhub.funcs = &mmhub_v2_0_funcs; } +static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: + adev->gfxhub.funcs = &gfxhub_v2_1_funcs; + break; + default: + adev->gfxhub.funcs = &gfxhub_v2_0_funcs; + break; + } +} + + static int gmc_v10_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; gmc_v10_0_set_mmhub_funcs(adev); + gmc_v10_0_set_gfxhub_funcs(adev); gmc_v10_0_set_gmc_funcs(adev); gmc_v10_0_set_irq_funcs(adev); gmc_v10_0_set_umc_funcs(adev); @@ -676,11 +691,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = 0; - if (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) - base = gfxhub_v2_1_get_fb_location(adev); - else - base = gfxhub_v2_0_get_fb_location(adev); + base = adev->gfxhub.funcs->get_fb_location(adev); /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; @@ -689,11 +700,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_gmc_gart_location(adev, mc); /* base offset of vram pages */ - if (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) - adev->vm_manager.vram_base_offset = gfxhub_v2_1_get_mc_fb_offset(adev); - else - adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); + adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); /* add the xgmi offset of the physical node */ adev->vm_manager.vram_base_offset += @@ -777,11 +784,7 @@ static int gmc_v10_0_sw_init(void *handle) int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) - gfxhub_v2_1_init(adev); - else - gfxhub_v2_0_init(adev); + adev->gfxhub.funcs->init(adev); adev->mmhub.funcs->init(adev); @@ -852,7 +855,7 @@ static int gmc_v10_0_sw_init(void *handle) } if (adev->gmc.xgmi.supported) { - r = gfxhub_v2_1_get_xgmi_info(adev); + r = adev->gfxhub.funcs->get_xgmi_info(adev); if (r) return r; } @@ -944,11 +947,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - if (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) - r = gfxhub_v2_1_gart_enable(adev); - else - r = gfxhub_v2_0_gart_enable(adev); + r = adev->gfxhub.funcs->gart_enable(adev); if (r) return r; @@ -969,11 +968,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; - if (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) - gfxhub_v2_1_set_fault_enable_default(adev, value); - else - gfxhub_v2_0_set_fault_enable_default(adev, value); + adev->gfxhub.funcs->set_fault_enable_default(adev, value); adev->mmhub.funcs->set_fault_enable_default(adev, value); gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); @@ -1014,11 +1009,7 @@ static int gmc_v10_0_hw_init(void *handle) */ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) { - if (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) - gfxhub_v2_1_gart_disable(adev); - else - gfxhub_v2_0_gart_disable(adev); + adev->gfxhub.funcs->gart_disable(adev); adev->mmhub.funcs->gart_disable(adev); amdgpu_gart_table_vram_unpin(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 5400cac02087..3ebbddb63705 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1164,6 +1164,19 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) } } +static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_ARCTURUS: + case CHIP_VEGA20: + adev->gfxhub.funcs = &gfxhub_v1_1_funcs; + break; + default: + adev->gfxhub.funcs = &gfxhub_v1_0_funcs; + break; + } +} + static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1172,6 +1185,7 @@ static int gmc_v9_0_early_init(void *handle) gmc_v9_0_set_irq_funcs(adev); gmc_v9_0_set_umc_funcs(adev); gmc_v9_0_set_mmhub_funcs(adev); + gmc_v9_0_set_gfxhub_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = @@ -1193,21 +1207,16 @@ static int gmc_v9_0_late_init(void *handle) r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) return r; - /* Check if ecc is available */ + + /* + * Workaround performance drop issue with VBIOS enables partial + * writes, while disables HBM ECC for vega10. + */ if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) { - r = amdgpu_atomfirmware_mem_ecc_supported(adev); - if (!r) { - DRM_INFO("ECC is not present.\n"); + if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) { if (adev->df.funcs->enable_ecc_force_par_wr_rmw) adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); - } else - DRM_INFO("ECC is active.\n"); - - r = amdgpu_atomfirmware_sram_ecc_supported(adev); - if (!r) - DRM_INFO("SRAM ECC is not present.\n"); - else - DRM_INFO("SRAM ECC is active.\n"); + } } if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count) @@ -1234,7 +1243,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_gmc_gart_location(adev, mc); amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ - adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); + adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); /* XXX: add the xgmi offset of the physical node? */ adev->vm_manager.vram_base_offset += @@ -1269,7 +1278,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) #ifdef CONFIG_X86_64 if (adev->flags & AMD_IS_APU) { - adev->gmc.aper_base = gfxhub_v1_0_get_mc_fb_offset(adev); + adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev); adev->gmc.aper_size = adev->gmc.real_vram_size; } #endif @@ -1339,7 +1348,7 @@ static int gmc_v9_0_sw_init(void *handle) int r, vram_width = 0, vram_type = 0, vram_vendor = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfxhub_v1_0_init(adev); + adev->gfxhub.funcs->init(adev); adev->mmhub.funcs->init(adev); @@ -1453,7 +1462,7 @@ static int gmc_v9_0_sw_init(void *handle) adev->need_swiotlb = drm_need_swiotlb(44); if (adev->gmc.xgmi.supported) { - r = gfxhub_v1_1_get_xgmi_info(adev); + r = adev->gfxhub.funcs->get_xgmi_info(adev); if (r) return r; } @@ -1569,7 +1578,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - r = gfxhub_v1_0_gart_enable(adev); + r = adev->gfxhub.funcs->gart_enable(adev); if (r) return r; @@ -1636,7 +1645,7 @@ static int gmc_v9_0_hw_init(void *handle) value = true; if (!amdgpu_sriov_vf(adev)) { - gfxhub_v1_0_set_fault_enable_default(adev, value); + adev->gfxhub.funcs->set_fault_enable_default(adev, value); adev->mmhub.funcs->set_fault_enable_default(adev, value); } for (i = 0; i < adev->num_vmhubs; ++i) @@ -1659,7 +1668,7 @@ static int gmc_v9_0_hw_init(void *handle) */ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) { - gfxhub_v1_0_gart_disable(adev); + adev->gfxhub.funcs->gart_disable(adev); adev->mmhub.funcs->gart_disable(adev); amdgpu_gart_table_vram_unpin(adev); } @@ -1683,14 +1692,9 @@ static int gmc_v9_0_hw_fini(void *handle) static int gmc_v9_0_suspend(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = gmc_v9_0_hw_fini(adev); - if (r) - return r; - - return 0; + return gmc_v9_0_hw_fini(adev); } static int gmc_v9_0_resume(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 4b746584a797..1c22d8393b21 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -832,7 +832,6 @@ static int mes_v10_1_queue_init(struct amdgpu_device *adev) static int mes_v10_1_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - int r; ring = &adev->mes.ring; @@ -849,11 +848,7 @@ static int mes_v10_1_ring_init(struct amdgpu_device *adev) ring->no_scheduler = true; sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT); - if (r) - return r; - - return 0; + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT); } static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 45a902b1acb7..f84701c562bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -268,7 +268,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !amdgpu_noretry); + !adev->gmc.noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 2d88278c50bf..2063700f0bc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -201,11 +201,6 @@ static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, 0x00FFFFFF); if (!amdgpu_sriov_vf(adev)) { - /* - * the new L1 policy will block SRIOV guest from writing - * these regs, and they will be programed at host. - * so skip programing these regs. - */ /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, adev->gmc.vram_start >> 18); @@ -374,7 +369,7 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !amdgpu_noretry); + !adev->gmc.noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 6c6ad529c65c..66748bb01b52 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -330,7 +330,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !amdgpu_noretry); + !adev->gmc.noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i * hub->ctx_distance, tmp); @@ -1624,6 +1624,34 @@ static void mmhub_v9_4_reset_ras_error_count(struct amdgpu_device *adev) } } +static const struct soc15_reg_entry mmhub_v9_4_err_status_regs[] = { + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA0_ERR_STATUS), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA1_ERR_STATUS), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA2_ERR_STATUS), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA3_ERR_STATUS), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA4_ERR_STATUS), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA5_ERR_STATUS), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA6_ERR_STATUS), 0, 0, 0 }, + { SOC15_REG_ENTRY(MMHUB, 0, mmMMEA7_ERR_STATUS), 0, 0, 0 }, +}; + +static void mmhub_v9_4_query_ras_error_status(struct amdgpu_device *adev) +{ + int i; + uint32_t reg_value; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) + return; + + for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_err_status_regs); i++) { + reg_value = + RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_err_status_regs[i])); + if (reg_value) + dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n", + i, reg_value); + } +} + const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = { .ras_late_init = amdgpu_mmhub_ras_late_init, .query_ras_error_count = mmhub_v9_4_query_ras_error_count, @@ -1636,4 +1664,5 @@ const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = { .set_clockgating = mmhub_v9_4_set_clockgating, .get_clockgating = mmhub_v9_4_get_clockgating, .setup_vm_pt_regs = mmhub_v9_4_setup_vm_pt_regs, + .query_ras_error_status = mmhub_v9_4_query_ras_error_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 0ec66030bd11..1ce741a0c6a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -69,75 +69,40 @@ static const struct amd_ip_funcs nv_common_ip_funcs; */ static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg) { - unsigned long flags, address, data; - u32 r; + unsigned long address, data; address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - WREG32(address, reg); - (void)RREG32(address); - r = RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); - return r; + return amdgpu_device_indirect_rreg(adev, address, data, reg); } static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { - unsigned long flags, address, data; + unsigned long address, data; address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - WREG32(address, reg); - (void)RREG32(address); - WREG32(data, v); - (void)RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + amdgpu_device_indirect_wreg(adev, address, data, reg, v); } static u64 nv_pcie_rreg64(struct amdgpu_device *adev, u32 reg) { - unsigned long flags, address, data; - u64 r; + unsigned long address, data; address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - /* read low 32 bit */ - WREG32(address, reg); - (void)RREG32(address); - r = RREG32(data); - - /* read high 32 bit*/ - WREG32(address, reg + 4); - (void)RREG32(address); - r |= ((u64)RREG32(data) << 32); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); - return r; + return amdgpu_device_indirect_rreg64(adev, address, data, reg); } static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) { - unsigned long flags, address, data; + unsigned long address, data; address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - /* write low 32 bit */ - WREG32(address, reg); - (void)RREG32(address); - WREG32(data, (u32)(v & 0xffffffffULL)); - (void)RREG32(data); - - /* write high 32 bit */ - WREG32(address, reg + 4); - (void)RREG32(address); - WREG32(data, (u32)(v >> 32)); - (void)RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + amdgpu_device_indirect_wreg64(adev, address, data, reg, v); } static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg) @@ -621,7 +586,7 @@ static void nv_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) { - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1); + WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); } else { amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index cbc04a5c0fe1..1ef2f5b1d828 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -83,19 +83,6 @@ struct psp_gfx_ctrl */ #define GFX_FLAG_RESPONSE 0x80000000 -/* Gbr IH registers ID */ -enum ih_reg_id { - IH_RB = 0, // IH_RB_CNTL - IH_RB_RNG1 = 1, // IH_RB_CNTL_RING1 - IH_RB_RNG2 = 2, // IH_RB_CNTL_RING2 -}; - -/* Command to setup Gibraltar IH register */ -struct psp_gfx_cmd_gbr_ih_reg { - uint32_t reg_value; /* Value to be set to the IH_RB_CNTL... register*/ - enum ih_reg_id reg_id; /* ID of the register */ -}; - /* TEE Gfx Command IDs for the ring buffer interface. */ enum psp_gfx_cmd_id { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 810635cbf4c1..86fb1eddf5a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -592,6 +592,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; + if (amdgpu_sriov_vf(adev)) + return 0; + DRM_DEBUG("\n"); switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 48c95a78a173..9c72b95b7463 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -203,6 +203,9 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) const struct common_firmware_header *header = NULL; const struct sdma_firmware_header_v1_0 *hdr; + if (amdgpu_sriov_vf(adev)) + return 0; + DRM_DEBUG("\n"); switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 34ccf376ee45..9f3952723c63 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -148,6 +148,9 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; + if (amdgpu_sriov_vf(adev)) + return 0; + DRM_DEBUG("\n"); switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ddd55e3176c4..afcccc6c0fc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -101,75 +101,40 @@ */ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) { - unsigned long flags, address, data; - u32 r; + unsigned long address, data; address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - WREG32(address, reg); - (void)RREG32(address); - r = RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); - return r; + return amdgpu_device_indirect_rreg(adev, address, data, reg); } static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { - unsigned long flags, address, data; + unsigned long address, data; address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - WREG32(address, reg); - (void)RREG32(address); - WREG32(data, v); - (void)RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + amdgpu_device_indirect_wreg(adev, address, data, reg, v); } static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg) { - unsigned long flags, address, data; - u64 r; + unsigned long address, data; address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - /* read low 32 bit */ - WREG32(address, reg); - (void)RREG32(address); - r = RREG32(data); - - /* read high 32 bit*/ - WREG32(address, reg + 4); - (void)RREG32(address); - r |= ((u64)RREG32(data) << 32); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); - return r; + return amdgpu_device_indirect_rreg64(adev, address, data, reg); } static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) { - unsigned long flags, address, data; + unsigned long address, data; address = adev->nbio.funcs->get_pcie_index_offset(adev); data = adev->nbio.funcs->get_pcie_data_offset(adev); - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - /* write low 32 bit */ - WREG32(address, reg); - (void)RREG32(address); - WREG32(data, (u32)(v & 0xffffffffULL)); - (void)RREG32(data); - - /* write high 32 bit */ - WREG32(address, reg + 4); - (void)RREG32(address); - WREG32(data, (u32)(v >> 32)); - (void)RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + amdgpu_device_indirect_wreg64(adev, address, data, reg, v); } static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) @@ -697,12 +662,12 @@ static void soc15_reg_base_init(struct amdgpu_device *adev) * it doesn't support SRIOV. */ if (amdgpu_discovery) { r = amdgpu_discovery_reg_base_init(adev); - if (r) { - DRM_WARN("failed to init reg base from ip discovery table, " - "fallback to legacy init method\n"); - vega10_reg_base_init(adev); - } + if (r == 0) + break; + DRM_WARN("failed to init reg base from ip discovery table, " + "fallback to legacy init method\n"); } + vega10_reg_base_init(adev); break; case CHIP_VEGA20: vega20_reg_base_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 3cafba726587..b0c0c438fc93 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -348,7 +348,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev) /* Set the write pointer delay */ WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0); - /* programm the 4GB memory segment for rptr and ring buffer */ + /* program the 4GB memory segment for rptr and ring buffer */ WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) | (0x7 << 16) | (0x1 << 31)); @@ -541,7 +541,7 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev) uint64_t addr; uint32_t size; - /* programm the VCPU memory controller bits 0-27 */ + /* program the VCPU memory controller bits 0-27 */ addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index a566ff926e90..6e57001f6d0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -253,7 +253,7 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev) uint64_t offset; uint32_t size; - /* programm memory controller bits 0-27 */ + /* program memory controller bits 0-27 */ WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, lower_32_bits(adev->uvd.inst->gpu_addr)); WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, @@ -404,7 +404,7 @@ static int uvd_v5_0_start(struct amdgpu_device *adev) /* set the wb address */ WREG32(mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2)); - /* programm the RB_BASE for ring buffer */ + /* program the RB_BASE for ring buffer */ WREG32(mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); WREG32(mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index ed30fb48b9db..666bfa4a0b8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -583,7 +583,7 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev) uint64_t offset; uint32_t size; - /* programm memory controller bits 0-27 */ + /* program memory controller bits 0-27 */ WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, lower_32_bits(adev->uvd.inst->gpu_addr)); WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, @@ -825,7 +825,7 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) /* set the wb address */ WREG32(mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2)); - /* programm the RB_BASE for ring buffer */ + /* program the RB_BASE for ring buffer */ WREG32(mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); WREG32(mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index e07e3fae99b5..b44c8677ce8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1073,7 +1073,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2)); - /* programm the RB_BASE for ring buffer */ + /* program the RB_BASE for ring buffer */ WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 927c330fad21..73699eafb51e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -910,7 +910,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2)); - /* programm the RB_BASE for ring buffer */ + /* program the RB_BASE for ring buffer */ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, @@ -1068,7 +1068,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2)); - /* programm the RB_BASE for ring buffer */ + /* program the RB_BASE for ring buffer */ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 23a9eb5b2c8a..e5d29dee0c88 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -900,7 +900,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2)); - /* programm the RB_BASE for ring buffer */ + /* program the RB_BASE for ring buffer */ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, @@ -1060,7 +1060,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET; - /* programm the RB_BASE for ring buffer */ + /* program the RB_BASE for ring buffer */ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 139fac0d8e76..0f1d3ef8baa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -882,7 +882,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2)); - /* programm the RB_BASE for ring buffer */ + /* program the RB_BASE for ring buffer */ WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, @@ -1062,7 +1062,7 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET; - /* programm the RB_BASE for ring buffer */ + /* program the RB_BASE for ring buffer */ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index b7b16adb0615..222f1df1a6b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -97,6 +97,7 @@ void kfd_chardev_exit(void) device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); class_destroy(kfd_class); unregister_chrdev(kfd_char_dev_major, kfd_dev_name); + kfd_device = NULL; } struct device *kfd_chardev(void) @@ -1290,18 +1291,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return -EINVAL; } - if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { - if (args->size != kfd_doorbell_process_slice(dev)) - return -EINVAL; - offset = kfd_get_process_doorbells(dev, p); - } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { - if (args->size != PAGE_SIZE) - return -EINVAL; - offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); - if (!offset) - return -ENOMEM; - } - mutex_lock(&p->mutex); pdd = kfd_bind_process_to_device(dev, p); @@ -1310,6 +1299,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_unlock; } + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { + if (args->size != kfd_doorbell_process_slice(dev)) { + err = -EINVAL; + goto err_unlock; + } + offset = kfd_get_process_doorbells(pdd); + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { + if (args->size != PAGE_SIZE) { + err = -EINVAL; + goto err_unlock; + } + offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + if (!offset) { + err = -ENOMEM; + goto err_unlock; + } + } + err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( dev->kgd, args->va_addr, args->size, pdd->vm, (struct kgd_mem **) &mem, &offset, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 3fac06b281ce..d2981524dba0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -797,7 +797,8 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) return -ENODATA; } - pcrat_image = kmemdup(crat_table, crat_table->length, GFP_KERNEL); + pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL); + memcpy(pcrat_image, crat_table, crat_table->length); if (!pcrat_image) return -ENOMEM; @@ -809,11 +810,10 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) /* Memory required to create Virtual CRAT. * Since there is no easy way to predict the amount of memory required, the - * following amount are allocated for CPU and GPU Virtual CRAT. This is + * following amount is allocated for GPU Virtual CRAT. This is * expected to cover all known conditions. But to be safe additional check * is put in the code to ensure we don't overwrite. */ -#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE) #define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE) /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node @@ -964,7 +964,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) #endif int ret = 0; - if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) + if (!pcrat_image) return -EINVAL; /* Fill in CRAT Header. @@ -1364,30 +1364,37 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, uint32_t proximity_domain) { void *pcrat_image = NULL; - int ret = 0; + int ret = 0, num_nodes; + size_t dyn_size; if (!crat_image) return -EINVAL; *crat_image = NULL; - /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and - * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover - * all the current conditions. A check is put not to overwrite beyond - * allocated size + /* Allocate the CPU Virtual CRAT size based on the number of online + * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. + * This should cover all the current conditions. A check is put not + * to overwrite beyond allocated size for GPUs */ switch (flags) { case COMPUTE_UNIT_CPU: - pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL); + num_nodes = num_online_nodes(); + dyn_size = sizeof(struct crat_header) + + num_nodes * (sizeof(struct crat_subtype_computeunit) + + sizeof(struct crat_subtype_memory) + + (num_nodes - 1) * sizeof(struct crat_subtype_iolink)); + pcrat_image = kvmalloc(dyn_size, GFP_KERNEL); if (!pcrat_image) return -ENOMEM; - *size = VCRAT_SIZE_FOR_CPU; + *size = dyn_size; + pr_debug("CRAT size is %ld", dyn_size); ret = kfd_create_vcrat_image_cpu(pcrat_image, size); break; case COMPUTE_UNIT_GPU: if (!kdev) return -EINVAL; - pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); + pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); if (!pcrat_image) return -ENOMEM; *size = VCRAT_SIZE_FOR_GPU; @@ -1406,7 +1413,7 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, if (!ret) *crat_image = pcrat_image; else - kfree(pcrat_image); + kvfree(pcrat_image); return ret; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index e3fc6ed7b79c..903170e59342 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, atomic_set(&kfd->sram_ecc_flag, 0); + ida_init(&kfd->doorbell_ida); + return kfd; } @@ -716,6 +718,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->unique_id = amdgpu_amdkfd_get_unique_id(kfd->kgd); + kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd); + if (kfd_interrupt_init(kfd)) { dev_err(kfd_device, "Error initializing interrupts\n"); goto kfd_interrupt_error; @@ -798,6 +802,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); kfd_doorbell_fini(kfd); + ida_destroy(&kfd->doorbell_ida); kfd_gtt_sa_fini(kfd); amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); if (kfd->gws) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ed362ab8ec21..62504d5fa42b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) } q->properties.doorbell_off = - kfd_get_doorbell_dw_offset_in_bar(dev, q->process, + kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd), q->doorbell_id); - return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index 309f63a0b34a..eca6331efa94 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -61,7 +61,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; - if (amdgpu_noretry && + if (dqm->dev->noretry && !dqm->dev->use_iommu_v2) qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 8e0c00b9555e..768d153acff4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -31,9 +31,6 @@ * kernel queues using the first doorbell page reserved for the kernel. */ -static DEFINE_IDA(doorbell_ida); -static unsigned int max_doorbell_slices; - /* * Each device exposes a doorbell aperture, a PCI MMIO aperture that * receives 32-bit writes that are passed to queues as wptr values. @@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd) else return -ENOSPC; - if (!max_doorbell_slices || - doorbell_process_limit < max_doorbell_slices) - max_doorbell_slices = doorbell_process_limit; + if (!kfd->max_doorbell_slices || + doorbell_process_limit < kfd->max_doorbell_slices) + kfd->max_doorbell_slices = doorbell_process_limit; kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + doorbell_start_offset; @@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma) { phys_addr_t address; + struct kfd_process_device *pdd; /* * For simplicitly we only allow mapping of the entire doorbell @@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) return -EINVAL; - /* Calculate physical address of doorbell */ - address = kfd_get_process_doorbells(dev, process); + pdd = kfd_get_process_device_data(dev, process); + if (!pdd) + return -EINVAL; + /* Calculate physical address of doorbell */ + address = kfd_get_process_doorbells(pdd); vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; @@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value) } unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, - struct kfd_process *process, + struct kfd_process_device *pdd, unsigned int doorbell_id) { /* @@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, * units regardless of the ASIC-dependent doorbell size. */ return kfd->doorbell_base_dw_offset + - process->doorbell_index + pdd->doorbell_index * kfd_doorbell_process_slice(kfd) / sizeof(u32) + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); } @@ -251,25 +252,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd) } -phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, - struct kfd_process *process) +phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd) { - return dev->doorbell_base + - process->doorbell_index * kfd_doorbell_process_slice(dev); + return pdd->dev->doorbell_base + + pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev); } -int kfd_alloc_process_doorbells(struct kfd_process *process) +int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index) { - int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices, + int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices, GFP_KERNEL); if (r > 0) - process->doorbell_index = r; + *doorbell_index = r; return r; } -void kfd_free_process_doorbells(struct kfd_process *process) +void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index) { - if (process->doorbell_index) - ida_simple_remove(&doorbell_ida, process->doorbell_index); + if (doorbell_index) + ida_simple_remove(&kfd->doorbell_ida, doorbell_index); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index f4b7f7e6c40e..5e90fe642192 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -70,6 +70,7 @@ err_create_wq: err_topology: kfd_chardev_exit(); err_ioctl: + pr_err("KFD is disabled due to module initialization failure\n"); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 8c2b8ccd27fb..b7be5c5751b7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -314,6 +314,11 @@ struct kfd_dev { spinlock_t smi_lock; uint32_t reset_seq_num; + + struct ida doorbell_ida; + unsigned int max_doorbell_slices; + + int noretry; }; enum kfd_mempool { @@ -699,6 +704,32 @@ struct kfd_process_device { struct attribute attr_evict; struct kobject *kobj_stats; + unsigned int doorbell_index; + + /* + * @cu_occupancy: Reports occupancy of Compute Units (CU) of a process + * that is associated with device encoded by "this" struct instance. The + * value reflects CU usage by all of the waves launched by this process + * on this device. A very important property of occupancy parameter is + * that its value is a snapshot of current use. + * + * Following is to be noted regarding how this parameter is reported: + * + * The number of waves that a CU can launch is limited by couple of + * parameters. These are encoded by struct amdgpu_cu_info instance + * that is part of every device definition. For GFX9 devices this + * translates to 40 waves (simd_per_cu * max_waves_per_simd) when waves + * do not use scratch memory and 32 waves (max_scratch_slots_per_cu) + * when they do use scratch memory. This could change for future + * devices and therefore this example should be considered as a guide. + * + * All CU's of a device are available for the process. This may not be true + * under certain conditions - e.g. CU masking. + * + * Finally number of CU's that are occupied by a process is affected by both + * number of CU's a device has along with number of other competing processes + */ + struct attribute attr_cu_occupancy; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -736,7 +767,6 @@ struct kfd_process { struct mmu_notifier mmu_notifier; uint16_t pasid; - unsigned int doorbell_index; /* * List of kfd_process_device structures, @@ -869,13 +899,13 @@ u32 read_kernel_doorbell(u32 __iomem *db); void write_kernel_doorbell(void __iomem *db, u32 value); void write_kernel_doorbell64(void __iomem *db, u64 value); unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, - struct kfd_process *process, + struct kfd_process_device *pdd, unsigned int doorbell_id); -phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, - struct kfd_process *process); -int kfd_alloc_process_doorbells(struct kfd_process *process); -void kfd_free_process_doorbells(struct kfd_process *process); - +phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd); +int kfd_alloc_process_doorbells(struct kfd_dev *kfd, + unsigned int *doorbell_index); +void kfd_free_process_doorbells(struct kfd_dev *kfd, + unsigned int doorbell_index); /* GTT Sub-Allocator */ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ad53b2668221..2807e1c4d59b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -249,6 +249,52 @@ cleanup: } } +/** + * @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device + * by current process. Translates acquired wave count into number of compute units + * that are occupied. + * + * @atr: Handle of attribute that allows reporting of wave count. The attribute + * handle encapsulates GPU device it is associated with, thereby allowing collection + * of waves in flight, etc + * + * @buffer: Handle of user provided buffer updated with wave count + * + * Return: Number of bytes written to user buffer or an error value + */ +static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) +{ + int cu_cnt; + int wave_cnt; + int max_waves_per_cu; + struct kfd_dev *dev = NULL; + struct kfd_process *proc = NULL; + struct kfd_process_device *pdd = NULL; + + pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy); + dev = pdd->dev; + if (dev->kfd2kgd->get_cu_occupancy == NULL) + return -EINVAL; + + cu_cnt = 0; + proc = pdd->process; + if (pdd->qpd.queue_count == 0) { + pr_debug("Gpu-Id: %d has no active queues for process %d\n", + dev->id, proc->pasid); + return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt); + } + + /* Collect wave count from device if it supports */ + wave_cnt = 0; + max_waves_per_cu = 0; + dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt, + &max_waves_per_cu); + + /* Translate wave count to number of compute units */ + cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; + return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt); +} + static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, char *buffer) { @@ -344,6 +390,7 @@ static ssize_t kfd_procfs_queue_show(struct kobject *kobj, return 0; } + static ssize_t kfd_procfs_stats_show(struct kobject *kobj, struct attribute *attr, char *buffer) { @@ -359,8 +406,13 @@ static ssize_t kfd_procfs_stats_show(struct kobject *kobj, PAGE_SIZE, "%llu\n", jiffies64_to_msecs(evict_jiffies)); - } else + + /* Sysfs handle that gets CU occupancy is per device */ + } else if (strcmp(attr->name, "cu_occupancy") == 0) { + return kfd_get_cu_occupancy(attr, buffer); + } else { pr_err("Invalid attribute"); + } return 0; } @@ -466,6 +518,7 @@ static int kfd_procfs_add_sysfs_stats(struct kfd_process *p) * Create sysfs files for each GPU: * - proc/<pid>/stats_<gpuid>/ * - proc/<pid>/stats_<gpuid>/evicted_ms + * - proc/<pid>/stats_<gpuid>/cu_occupancy */ list_for_each_entry(pdd, &p->per_device_data, per_device_list) { struct kobject *kobj_stats; @@ -496,6 +549,19 @@ static int kfd_procfs_add_sysfs_stats(struct kfd_process *p) if (ret) pr_warn("Creating eviction stats for gpuid %d failed", (int)pdd->dev->id); + + /* Add sysfs file to report compute unit occupancy */ + if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) { + pdd->attr_cu_occupancy.name = "cu_occupancy"; + pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&pdd->attr_cu_occupancy); + ret = sysfs_create_file(kobj_stats, + &pdd->attr_cu_occupancy); + if (ret) + pr_warn("Creating %s failed for gpuid: %d", + pdd->attr_cu_occupancy.name, + (int)pdd->dev->id); + } } err: return ret; @@ -537,7 +603,6 @@ static int kfd_procfs_add_sysfs_files(struct kfd_process *p) return ret; } - void kfd_procfs_del_queue(struct queue *q) { if (!q) @@ -750,11 +815,6 @@ struct kfd_process *kfd_create_process(struct file *filep) pr_warn("Creating sysfs stats dir for pid %d failed", (int)process->lead_thread->pid); - ret = kfd_procfs_add_sysfs_stats(process); - if (ret) - pr_warn("Creating sysfs stats dir for pid %d failed", - (int)process->lead_thread->pid); - ret = kfd_procfs_add_sysfs_files(process); if (ret) pr_warn("Creating sysfs usage file for pid %d failed", @@ -876,6 +936,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) kfree(pdd->qpd.doorbell_bitmap); idr_destroy(&pdd->alloc_idr); + kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index); + /* * before destroying pdd, make sure to report availability * for auto suspend @@ -912,6 +974,8 @@ static void kfd_process_wq_release(struct work_struct *work) sysfs_remove_file(p->kobj, &pdd->attr_vram); sysfs_remove_file(p->kobj, &pdd->attr_sdma); sysfs_remove_file(p->kobj, &pdd->attr_evict); + if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) + sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy); kobject_del(pdd->kobj_stats); kobject_put(pdd->kobj_stats); pdd->kobj_stats = NULL; @@ -932,8 +996,6 @@ static void kfd_process_wq_release(struct work_struct *work) kfd_event_free_process(p); kfd_pasid_free(p->pasid); - kfd_free_process_doorbells(p); - mutex_destroy(&p->mutex); put_task_struct(p->lead_thread); @@ -1111,9 +1173,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (process->pasid == 0) goto err_alloc_pasid; - if (kfd_alloc_process_doorbells(process) < 0) - goto err_alloc_doorbells; - err = pqm_init(&process->pqm, process); if (err != 0) goto err_process_pqm_init; @@ -1141,8 +1200,6 @@ err_register_notifier: err_init_apertures: pqm_uninit(&process->pqm); err_process_pqm_init: - kfd_free_process_doorbells(process); -err_alloc_doorbells: kfd_pasid_free(process->pasid); err_alloc_pasid: mutex_destroy(&process->mutex); @@ -1205,10 +1262,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, if (!pdd) return NULL; + if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) { + pr_err("Failed to alloc doorbell for pdd\n"); + goto err_free_pdd; + } + if (init_doorbell_bitmap(&pdd->qpd, dev)) { pr_err("Failed to init doorbell for process\n"); - kfree(pdd); - return NULL; + goto err_free_pdd; } pdd->dev = dev; @@ -1231,6 +1292,10 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, idr_init(&pdd->alloc_idr); return pdd; + +err_free_pdd: + kfree(pdd); + return NULL; } /** diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 004cd8d38214..8cd646eef096 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -908,7 +908,7 @@ static ssize_t dp_dpcd_address_write(struct file *f, const char __user *buf, struct amdgpu_dm_connector *connector = file_inode(f)->i_private; if (size < sizeof(connector->debugfs_dpcd_address)) - return 0; + return -EINVAL; r = copy_from_user(&connector->debugfs_dpcd_address, buf, sizeof(connector->debugfs_dpcd_address)); @@ -923,7 +923,7 @@ static ssize_t dp_dpcd_size_write(struct file *f, const char __user *buf, struct amdgpu_dm_connector *connector = file_inode(f)->i_private; if (size < sizeof(connector->debugfs_dpcd_size)) - return 0; + return -EINVAL; r = copy_from_user(&connector->debugfs_dpcd_size, buf, sizeof(connector->debugfs_dpcd_size)); @@ -943,8 +943,8 @@ static ssize_t dp_dpcd_data_write(struct file *f, const char __user *buf, struct dc_link *link = connector->dc_link; uint32_t write_size = connector->debugfs_dpcd_size; - if (size < write_size) - return 0; + if (!write_size || size < write_size) + return -EINVAL; data = kzalloc(write_size, GFP_KERNEL); if (!data) @@ -967,7 +967,7 @@ static ssize_t dp_dpcd_data_read(struct file *f, char __user *buf, struct dc_link *link = connector->dc_link; uint32_t read_size = connector->debugfs_dpcd_size; - if (size < read_size) + if (!read_size || size < read_size) return 0; data = kzalloc(read_size, GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 694c5bc93665..c2cd184f0bbd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -604,7 +604,7 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct int i = 0; hdcp_work = kcalloc(max_caps, sizeof(*hdcp_work), GFP_KERNEL); - if (hdcp_work == NULL) + if (ZERO_OR_NULL_PTR(hdcp_work)) return NULL; hdcp_work->srm = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, sizeof(*hdcp_work->srm), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 9d7333a36fac..db741e47d194 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -159,7 +159,20 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto u8 dsc_caps[16] = { 0 }; aconnector->dsc_aux = drm_dp_mst_dsc_aux_for_port(port); +#if defined(CONFIG_HP_HOOK_WORKAROUND) + /* + * drm_dp_mst_dsc_aux_for_port() will return NULL for certain configs + * because it only check the dsc/fec caps of the "port variable" and not the dock + * + * This case will return NULL: DSC capabe MST dock connected to a non fec/dsc capable display + * + * Workaround: explicitly check the use case above and use the mst dock's aux as dsc_aux + * + */ + if (!aconnector->dsc_aux && !port->parent->port_parent) + aconnector->dsc_aux = &aconnector->mst_port->dm_dp_aux.aux; +#endif if (!aconnector->dsc_aux) return false; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c index d031bd3d3072..807dca8f7d7a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c @@ -79,8 +79,7 @@ int dce112_set_clock(struct clk_mgr *clk_mgr_base, int requested_clk_khz) memset(&dce_clk_params, 0, sizeof(dce_clk_params)); /* Make sure requested clock isn't lower than minimum threshold*/ - if (requested_clk_khz > 0) - requested_clk_khz = max(requested_clk_khz, + requested_clk_khz = max(requested_clk_khz, clk_mgr_dce->base.dentist_vco_freq_khz / 62); dce_clk_params.target_clock_frequency = requested_clk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 136ae6d70c80..2f8fee05547a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -784,7 +784,6 @@ void rn_clk_mgr_construct( } else { struct clk_log_info log_info = {0}; - clk_mgr->smu_ver = rn_vbios_smu_get_smu_version(clk_mgr); clk_mgr->periodic_retraining_disabled = rn_vbios_smu_is_periodic_retraining_disabled(clk_mgr); /* SMU Version 55.51.0 and up no longer have an issue diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 83ce55edb3aa..2a725a5fba40 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -735,6 +735,8 @@ static bool dc_construct(struct dc *dc, dc->clk_mgr->force_smu_not_present = init_params->force_smu_not_present; #endif + dc->debug.force_ignore_link_settings = init_params->force_ignore_link_settings; + if (dc->res_pool->funcs->update_bw_bounding_box) dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); @@ -842,6 +844,60 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) dc_release_state(current_ctx); } +static void disable_vbios_mode_if_required( + struct dc *dc, + struct dc_state *context) +{ + unsigned int i; + + /* check if timing_changed, disable stream*/ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_stream_state *stream = NULL; + struct dc_link *link = NULL; + struct pipe_ctx *pipe = NULL; + + pipe = &context->res_ctx.pipe_ctx[i]; + stream = pipe->stream; + if (stream == NULL) + continue; + + if (stream->link->local_sink && + stream->link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { + link = stream->link; + } + + if (link != NULL) { + unsigned int enc_inst, tg_inst = 0; + unsigned int pix_clk_100hz; + + enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc); + if (enc_inst != ENGINE_ID_UNKNOWN) { + for (i = 0; i < dc->res_pool->stream_enc_count; i++) { + if (dc->res_pool->stream_enc[i]->id == enc_inst) { + tg_inst = dc->res_pool->stream_enc[i]->funcs->dig_source_otg( + dc->res_pool->stream_enc[i]); + break; + } + } + + dc->res_pool->dp_clock_source->funcs->get_pixel_clk_frequency_100hz( + dc->res_pool->dp_clock_source, + tg_inst, &pix_clk_100hz); + + if (link->link_status.link_active) { + uint32_t requested_pix_clk_100hz = + pipe->stream_res.pix_clk_params.requested_pix_clk_100hz; + + if (pix_clk_100hz != requested_pix_clk_100hz) { + core_link_disable_stream(pipe); + pipe->stream->dpms_off = false; + } + } + } + } + } +} + static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) { int i; @@ -1278,15 +1334,17 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c for (i = 0; i < context->stream_count; i++) dc_streams[i] = context->streams[i]; - if (!dcb->funcs->is_accelerated_mode(dcb)) + if (!dcb->funcs->is_accelerated_mode(dcb)) { + disable_vbios_mode_if_required(dc, context); dc->hwss.enable_accelerated_mode(dc, context); + } - for (i = 0; i < context->stream_count; i++) { + for (i = 0; i < context->stream_count; i++) if (context->streams[i]->apply_seamless_boot_optimization) dc->optimize_seamless_boot_streams++; - } - if (context->stream_count > dc->optimize_seamless_boot_streams) + if (context->stream_count > dc->optimize_seamless_boot_streams || + context->stream_count == 0) dc->hwss.prepare_bandwidth(dc, context); disable_dangling_plane(dc, context); @@ -1368,7 +1426,8 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc_enable_stereo(dc, context, dc_streams, context->stream_count); - if (context->stream_count > dc->optimize_seamless_boot_streams) { + if (context->stream_count > dc->optimize_seamless_boot_streams || + context->stream_count == 0) { /* Must wait for no flips to be pending before doing optimize bw */ wait_for_no_pipes_pending(dc, context); /* pplib is notified if disp_num changed */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index c026b393f3c5..2a9080400bdd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -177,7 +177,7 @@ static bool is_ycbcr709_limited_type( ret = true; return ret; } -enum dc_color_space_type get_color_space_type(enum dc_color_space color_space) +static enum dc_color_space_type get_color_space_type(enum dc_color_space color_space) { enum dc_color_space_type type = COLOR_SPACE_RGB_TYPE; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 1871ff6119ae..fec87a2e210c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2441,7 +2441,7 @@ enum dc_status dc_link_validate_mode_timing( /* A hack to avoid failing any modes for EDID override feature on * topology change such as lower quality cable for DP or different dongle */ - if (link->remote_sinks[0]) + if (link->remote_sinks[0] && link->remote_sinks[0]->sink_signal == SIGNAL_TYPE_VIRTUAL) return DC_OK; /* Passive Dongle */ @@ -2566,7 +2566,7 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, bool allow_active, bool link->psr_settings.psr_allow_active = allow_active; if (psr != NULL && link->psr_settings.psr_feature_enabled) - psr->funcs->psr_enable(psr, allow_active); + psr->funcs->psr_enable(psr, allow_active, wait); else if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_settings.psr_feature_enabled) dmcu->funcs->set_psr_enable(dmcu, allow_active, wait); else diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index b984eecca58b..dec12de37642 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -148,14 +148,6 @@ static uint32_t dal_ddc_i2c_payloads_get_count(struct i2c_payloads *p) return p->payloads.count; } -static void dal_ddc_i2c_payloads_destroy(struct i2c_payloads *p) -{ - if (!p) - return; - - dal_vector_destruct(&p->payloads); -} - #define DDC_MIN(a, b) (((a) < (b)) ? (a) : (b)) void dal_ddc_i2c_payloads_add( @@ -582,7 +574,7 @@ bool dal_ddc_service_query_ddc_data( ddc->link, &command); - dal_ddc_i2c_payloads_destroy(&payloads); + dal_vector_destruct(&payloads.payloads); } return success; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index b9b66db8332b..ff1e9963ec7a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -49,6 +49,23 @@ static struct dc_link_settings get_common_supported_link_settings( struct dc_link_settings link_setting_a, struct dc_link_settings link_setting_b); +static uint32_t get_cr_training_aux_rd_interval(struct dc_link *link, + const struct dc_link_settings *link_settings) +{ + union training_aux_rd_interval training_rd_interval; + uint32_t wait_in_micro_secs = 100; + + memset(&training_rd_interval, 0, sizeof(training_rd_interval)); + core_link_read_dpcd( + link, + DP_TRAINING_AUX_RD_INTERVAL, + (uint8_t *)&training_rd_interval, + sizeof(training_rd_interval)); + if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) + wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; + return wait_in_micro_secs; +} + static uint32_t get_eq_training_aux_rd_interval( struct dc_link *link, const struct dc_link_settings *link_settings) @@ -1247,7 +1264,7 @@ static void initialize_training_settings( if (overrides->cr_pattern_time != NULL) lt_settings->cr_pattern_time = *overrides->cr_pattern_time; else - lt_settings->cr_pattern_time = 100; + lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting); if (overrides->eq_pattern_time != NULL) lt_settings->eq_pattern_time = *overrides->eq_pattern_time; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index 81c026319ccd..11a619befb42 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -229,6 +229,8 @@ void dp_disable_link_phy(struct dc_link *link, enum signal_type signal) dp_receiver_power_ctrl(link, false); if (signal == SIGNAL_TYPE_EDP) { + if (link->dc->hwss.edp_backlight_control) + link->dc->hwss.edp_backlight_control(link, false); link->link_enc->funcs->disable_output(link->link_enc, signal); link->dc->hwss.edp_power_control(link, false); } else { @@ -491,13 +493,15 @@ void dp_set_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) OPTC_DSC_DISABLED, 0, 0); /* disable DSC in stream encoder */ - if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { - pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config( - pipe_ctx->stream_res.stream_enc, - OPTC_DSC_DISABLED, 0, 0); - - pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_pps_info_packet( - pipe_ctx->stream_res.stream_enc, false, NULL); + if (dc_is_dp_signal(stream->signal)) { + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_config( + pipe_ctx->stream_res.stream_enc, + OPTC_DSC_DISABLED, 0, 0); + pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_pps_info_packet( + pipe_ctx->stream_res.stream_enc, false, NULL); + } } /* disable DSC block */ @@ -534,7 +538,6 @@ out: bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable) { struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc; - struct dc *dc = pipe_ctx->stream->ctx->dc; struct dc_stream_state *stream = pipe_ctx->stream; if (!pipe_ctx->stream->timing.flags.DSC || !dsc) @@ -557,7 +560,7 @@ bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable) DC_LOG_DSC(" "); dsc->funcs->dsc_get_packed_pps(dsc, &dsc_cfg, &dsc_packed_pps[0]); - if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + if (dc_is_dp_signal(stream->signal)) { DC_LOG_DSC("Setting stream encoder DSC PPS SDP for engine %d\n", (int)pipe_ctx->stream_res.stream_enc->id); pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_pps_info_packet( pipe_ctx->stream_res.stream_enc, @@ -566,7 +569,7 @@ bool dp_set_dsc_pps_sdp(struct pipe_ctx *pipe_ctx, bool enable) } } else { /* disable DSC PPS in stream encoder */ - if (dc_is_dp_signal(stream->signal) && !IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + if (dc_is_dp_signal(stream->signal)) { pipe_ctx->stream_res.stream_enc->funcs->dp_set_dsc_pps_info_packet( pipe_ctx->stream_res.stream_enc, false, NULL); } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 4cea9344d8aa..e430148e47cf 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -785,14 +785,15 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx) /* * Only the leftmost ODM pipe should be offset by a nonzero distance */ - if (!pipe_ctx->prev_odm_pipe) + if (!pipe_ctx->prev_odm_pipe) { data->recout.x = stream->dst.x; - else - data->recout.x = 0; - if (stream->src.x < surf_clip.x) - data->recout.x += (surf_clip.x - stream->src.x) * stream->dst.width + if (stream->src.x < surf_clip.x) + data->recout.x += (surf_clip.x - stream->src.x) * stream->dst.width / stream->src.width; + } else + data->recout.x = 0; + data->recout.width = surf_clip.width * stream->dst.width / stream->src.width; if (data->recout.width + data->recout.x > stream->dst.x + stream->dst.width) data->recout.width = stream->dst.x + stream->dst.width - data->recout.x; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index d9b22d6a985a..82fe0ab56e3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -42,7 +42,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.102" +#define DC_VER "3.2.104" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -503,6 +503,7 @@ struct dc_debug_options { bool usbc_combo_phy_reset_wa; bool disable_dsc; bool enable_dram_clock_change_one_display_vactive; + bool force_ignore_link_settings; }; struct dc_debug_data { @@ -660,6 +661,7 @@ struct dc_init_data { #if defined(CONFIG_DRM_AMD_DC_DCN3_0) bool force_smu_not_present; #endif + bool force_ignore_link_settings; }; struct dc_callback_init { diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index e002ef706e1d..266b93a705d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -237,6 +237,8 @@ enum dc_detect_reason { DETECT_REASON_BOOT, DETECT_REASON_HPD, DETECT_REASON_HPDRX, + DETECT_REASON_FALLBACK, + DETECT_REASON_RETRAIN }; bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c index df7f826eebd8..74f7619d4154 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c @@ -159,11 +159,15 @@ static uint32_t dce_panel_cntl_hw_init(struct panel_cntl *panel_cntl) static bool dce_is_panel_backlight_on(struct panel_cntl *panel_cntl) { struct dce_panel_cntl *dce_panel_cntl = TO_DCE_PANEL_CNTL(panel_cntl); - uint32_t value; + uint32_t blon, blon_ovrd, pwrseq_target_state; - REG_GET(PWRSEQ_CNTL, LVTMA_BLON, &value); + REG_GET_2(PWRSEQ_CNTL, LVTMA_BLON, &blon, LVTMA_BLON_OVRD, &blon_ovrd); + REG_GET(PWRSEQ_CNTL, LVTMA_PWRSEQ_TARGET_STATE, &pwrseq_target_state); - return value; + if (blon_ovrd) + return blon; + else + return pwrseq_target_state; } static bool dce_is_panel_powered_on(struct panel_cntl *panel_cntl) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h index 99c68ca9c7e0..6bd1196083a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.h @@ -54,15 +54,17 @@ SR(BL_PWM_CNTL2), \ SR(BL_PWM_PERIOD_CNTL), \ SR(BL_PWM_GRP1_REG_LOCK), \ - SR(BIOS_SCRATCH_2) + NBIO_SR(BIOS_SCRATCH_2) #define DCE_PANEL_CNTL_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix #define DCE_PANEL_CNTL_MASK_SH_LIST(mask_sh) \ DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_CNTL, LVTMA_BLON, mask_sh),\ + DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_CNTL, LVTMA_BLON_OVRD, mask_sh),\ DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_CNTL, LVTMA_DIGON, mask_sh),\ DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_CNTL, LVTMA_DIGON_OVRD, mask_sh),\ + DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_CNTL, LVTMA_PWRSEQ_TARGET_STATE, mask_sh), \ DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_STATE, LVTMA_PWRSEQ_TARGET_STATE_R, mask_sh), \ DCE_PANEL_CNTL_SF(LVTMA_PWRSEQ_REF_DIV, BL_PWM_REF_DIV, mask_sh), \ DCE_PANEL_CNTL_SF(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD, mask_sh), \ @@ -76,8 +78,10 @@ #define DCE_PANEL_CNTL_REG_FIELD_LIST(type) \ type LVTMA_BLON;\ + type LVTMA_BLON_OVRD;\ type LVTMA_DIGON;\ type LVTMA_DIGON_OVRD;\ + type LVTMA_PWRSEQ_TARGET_STATE; \ type LVTMA_PWRSEQ_TARGET_STATE_R; \ type BL_PWM_REF_DIV; \ type BL_PWM_EN; \ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 5167d6b8a48d..67af67ef2865 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -119,10 +119,11 @@ static bool dmub_psr_set_version(struct dmub_psr *dmub, struct dc_stream_state * /** * Enable/Disable PSR. */ -static void dmub_psr_enable(struct dmub_psr *dmub, bool enable) +static void dmub_psr_enable(struct dmub_psr *dmub, bool enable, bool wait) { union dmub_rb_cmd cmd; struct dc_context *dc = dmub->ctx; + uint32_t retry_count, psr_state = 0; cmd.psr_enable.header.type = DMUB_CMD__PSR; @@ -136,6 +137,30 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable) dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); dc_dmub_srv_wait_idle(dc->dmub_srv); + + /* Below loops 1000 x 500us = 500 ms. + * Exit PSR may need to wait 1-2 frames to power up. Timeout after at + * least a few frames. Should never hit the max retry assert below. + */ + if (wait) { + for (retry_count = 0; retry_count <= 1000; retry_count++) { + dmub_psr_get_state(dmub, &psr_state); + + if (enable) { + if (psr_state != 0) + break; + } else { + if (psr_state == 0) + break; + } + + udelay(500); + } + + /* assert if max retry hit */ + if (retry_count >= 1000) + ASSERT(0); + } } /** @@ -231,10 +256,11 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->smu_optimizations_en = psr_context->allow_smu_optimizations; copy_settings_data->frame_delay = psr_context->frame_delay; copy_settings_data->frame_cap_ind = psr_context->psrFrameCaptureIndicationReq; + copy_settings_data->init_sdp_deadline = psr_context->sdpTransmitLineNumDeadline; + copy_settings_data->debug.u32All = 0; copy_settings_data->debug.bitfields.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR ? true : false; - copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1; - copy_settings_data->init_sdp_deadline = psr_context->sdpTransmitLineNumDeadline; + copy_settings_data->debug.bitfields.use_hw_lock_mgr = 1; dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h index f404fecd6410..dc121ed92d2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.h @@ -36,7 +36,7 @@ struct dmub_psr { struct dmub_psr_funcs { bool (*psr_copy_settings)(struct dmub_psr *dmub, struct dc_link *link, struct psr_context *psr_context); - void (*psr_enable)(struct dmub_psr *dmub, bool enable); + void (*psr_enable)(struct dmub_psr *dmub, bool enable, bool wait); void (*psr_get_state)(struct dmub_psr *dmub, uint32_t *psr_state); void (*psr_set_level)(struct dmub_psr *dmub, uint16_t psr_level); }; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 1002ce9979dc..3ac6c7b65a45 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1654,7 +1654,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) // enable fastboot if backend is enabled on eDP if (edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc)) { /* Set optimization flag on eDP stream*/ - if (edp_stream) { + if (edp_stream && edp_link->link_status.link_active) { edp_stream->apply_edp_fast_boot_optimization = true; can_apply_edp_fast_boot = true; } @@ -2737,7 +2737,7 @@ static void program_output_csc(struct dc *dc, } } -void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx) +static void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx) { struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position; struct input_pixel_processor *ipp = pipe_ctx->plane_res.ipp; @@ -2782,7 +2782,7 @@ void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx) mi->funcs->set_cursor_position(mi, &pos_cpy, ¶m); } -void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx) +static void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx) { struct dc_cursor_attributes *attributes = &pipe_ctx->stream->cursor_attributes; @@ -2890,6 +2890,7 @@ static const struct hw_sequencer_funcs dce110_funcs = { .setup_stereo = NULL, .set_avmute = dce110_set_avmute, .wait_for_mpcc_disconnect = dce110_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, .edp_power_control = dce110_edp_power_control, .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, .set_cursor_position = dce110_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c index a1d1559bb5d7..b24c8ae8b1ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c @@ -66,6 +66,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .get_hw_state = dcn10_get_hw_state, .clear_status_bits = dcn10_clear_status_bits, .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, .edp_power_control = dce110_edp_power_control, .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, .set_cursor_position = dcn10_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 2972392f9788..800be2693fac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -288,6 +288,17 @@ void optc1_program_timing( if (optc1_is_two_pixels_per_containter(&patched_crtc_timing) || optc1->opp_count == 2) h_div = H_TIMING_DIV_BY2; + if (REG(OPTC_DATA_FORMAT_CONTROL)) { + uint32_t data_fmt = 0; + + if (patched_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) + data_fmt = 1; + else if (patched_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + data_fmt = 2; + + REG_UPDATE(OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, data_fmt); + } + #if defined(CONFIG_DRM_AMD_DC_DCN3_0) if (optc1->tg_mask->OTG_H_TIMING_DIV_MODE != 0) { if (optc1->opp_count == 4) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 1abd81e17f09..a78712caf124 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -798,7 +798,7 @@ static const struct encoder_feature_support link_enc_feature = { .max_hdmi_deep_color = COLOR_DEPTH_121212, .max_hdmi_pixel_clock = 600000, .hdmi_ycbcr420_supported = true, - .dp_ycbcr420_supported = false, + .dp_ycbcr420_supported = true, .flags.bits.IS_HBR2_CAPABLE = true, .flags.bits.IS_HBR3_CAPABLE = true, .flags.bits.IS_TPS3_CAPABLE = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index 9cf139be3f40..f70fcadf1ee5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -896,10 +896,10 @@ void enc1_stream_encoder_dp_blank( */ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, 2); /* Larger delay to wait until VBLANK - use max retry of - * 10us*5000=50ms. This covers 41.7ms of minimum 24 Hz mode + + * 10us*10200=102ms. This covers 100.0ms of minimum 10 Hz mode + * a little more because we may not trust delay accuracy. */ - max_retries = DP_BLANK_MAX_RETRY * 250; + max_retries = DP_BLANK_MAX_RETRY * 501; /* disable DP stream */ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c index 966e1790b9bf..072193c5ffe6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c @@ -68,6 +68,7 @@ static const struct hw_sequencer_funcs dcn20_funcs = { .get_hw_state = dcn10_get_hw_state, .clear_status_bits = dcn10_clear_status_bits, .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, .edp_power_control = dce110_edp_power_control, .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, .set_cursor_position = dcn10_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c index 8c16967fe018..d8b18c515d06 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c @@ -239,7 +239,6 @@ void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right) / opp_cnt; uint32_t memory_mask; - uint32_t data_fmt = 0; ASSERT(opp_cnt == 2); @@ -262,13 +261,6 @@ void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c REG_SET(OPTC_MEMORY_CONFIG, 0, OPTC_MEM_SEL, memory_mask); - if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) - data_fmt = 1; - else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) - data_fmt = 2; - - REG_UPDATE(OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, data_fmt); - REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, OPTC_NUM_OF_INPUT_SEGMENT, 1, OPTC_SEG0_SRC_SEL, opp_id[0], diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 18b9465057ff..d50a9c370637 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -150,7 +150,6 @@ struct _vcs_dpi_ip_params_st dcn2_0_ip = { .dispclk_delay_subtotal = 87, // .dcfclk_cstate_latency = 10, // SRExitTime .max_inter_dcn_tile_repeaters = 8, - .xfc_supported = true, .xfc_fill_bw_overhead_percent = 10.0, .xfc_fill_constant_bytes = 0, @@ -298,8 +297,8 @@ static struct _vcs_dpi_soc_bounding_box_st dcn2_0_soc = { }, }, .num_states = 5, - .sr_exit_time_us = 8.6, - .sr_enter_plus_exit_time_us = 10.9, + .sr_exit_time_us = 11.6, + .sr_enter_plus_exit_time_us = 13.9, .urgent_latency_us = 4.0, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, @@ -2203,9 +2202,9 @@ int dcn20_populate_dml_pipes_from_context( /* todo: default max for now, until there is logic reflecting this in dc*/ pipes[pipe_cnt].dout.output_bpc = 12; #if defined(CONFIG_DRM_AMD_DC_DCN3_0) - /*fill up the audio sample rate*/ + /*fill up the audio sample rate (unit in kHz)*/ get_audio_check(&res_ctx->pipe_ctx[i].stream->audio_info, &aud_check); - pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate; + pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate / 1000; #endif /* * For graphic plane, cursor number is 1, nv12 is 0 diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c index 2ba880c3943c..2b7396c9fcb4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c @@ -69,6 +69,7 @@ static const struct hw_sequencer_funcs dcn21_funcs = { .get_hw_state = dcn10_get_hw_state, .clear_status_bits = dcn10_clear_status_bits, .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, .edp_power_control = dce110_edp_power_control, .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, .set_cursor_position = dcn10_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 025637a83c3b..bd2a068f9863 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -31,9 +31,21 @@ DCN30 = dcn30_init.o dcn30_hubbub.o dcn30_hubp.o dcn30_dpp.o dcn30_optc.o \ dcn30_dio_link_encoder.o dcn30_resource.o -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse -mpreferred-stack-boundary=4 - +ifdef CONFIG_X86 CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -msse +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse +endif + +ifdef CONFIG_PPC64 +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec +endif + +ifdef CONFIG_ARM64 +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mgeneral-regs-only +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mgeneral-regs-only +endif + ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 @@ -45,8 +57,10 @@ ifdef IS_OLD_GCC # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 # (8B stack alignment). CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -mpreferred-stack-boundary=4 +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -mpreferred-stack-boundary=4 else CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -msse2 +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -msse2 endif AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c index 19daa456e3bf..7c90c2222506 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c @@ -69,6 +69,7 @@ static const struct hw_sequencer_funcs dcn30_funcs = { .get_hw_state = dcn10_get_hw_state, .clear_status_bits = dcn10_clear_status_bits, .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, + .edp_backlight_control = dce110_edp_backlight_control, .edp_power_control = dce110_edp_power_control, .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, .set_cursor_position = dcn10_set_cursor_position, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c index 6d13431ff693..b1f228fc119a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c @@ -209,7 +209,6 @@ static void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, in int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right) / opp_cnt; uint32_t memory_mask = 0; - uint32_t data_fmt = 0; /* TODO: In pseudocode but does not affect maximus, delete comment if we dont need on asic * REG_SET(OTG_GLOBAL_CONTROL2, 0, GLOBAL_UPDATE_LOCK_EN, 1); @@ -240,13 +239,6 @@ static void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, in REG_SET(OPTC_MEMORY_CONFIG, 0, OPTC_MEM_SEL, memory_mask); - if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) - data_fmt = 1; - else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) - data_fmt = 2; - - REG_UPDATE(OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, data_fmt); - if (opp_cnt == 2) { REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, OPTC_NUM_OF_INPUT_SEGMENT, 1, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index dde87baf1370..24fb39a11e5d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1899,6 +1899,48 @@ static bool dcn30_split_stream_for_mpc_or_odm( return true; } +static struct pipe_ctx *dcn30_find_split_pipe( + struct dc *dc, + struct dc_state *context, + int old_index) +{ + struct pipe_ctx *pipe = NULL; + int i; + + if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[old_index]; + pipe->pipe_idx = old_index; + } + + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL + && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + } + + /* + * May need to fix pipes getting tossed from 1 opp to another on flip + * Add for debugging transient underflow during topology updates: + * ASSERT(pipe); + */ + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + + return pipe; +} + static bool dcn30_internal_validate_bw( struct dc *dc, struct dc_state *context, @@ -2024,6 +2066,7 @@ static bool dcn30_internal_validate_bw( dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { struct pipe_ctx *top_pipe = pipe->top_pipe; struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; @@ -2038,6 +2081,7 @@ static bool dcn30_internal_validate_bw( pipe->stream = NULL; memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; } else ASSERT(0); /* Should never try to merge master pipe */ @@ -2045,8 +2089,10 @@ static bool dcn30_internal_validate_bw( for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; struct pipe_ctx *hsplit_pipe = NULL; bool odm; + int old_index = -1; if (!pipe->stream || newly_split[i]) continue; @@ -2058,7 +2104,20 @@ static bool dcn30_internal_validate_bw( continue; if (split[i]) { - hsplit_pipe = find_idle_secondary_pipe(&context->res_ctx, dc->res_pool, pipe); + if (odm) { + if (split[i] == 4 && old_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + } else { + if (split[i] == 4 && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else if (old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + } + hsplit_pipe = dcn30_find_split_pipe(dc, context, old_index); ASSERT(hsplit_pipe); if (!hsplit_pipe) goto validate_fail; @@ -2072,8 +2131,16 @@ static bool dcn30_internal_validate_bw( repopulate_pipes = true; } if (split[i] == 4) { - struct pipe_ctx *pipe_4to1 = find_idle_secondary_pipe(&context->res_ctx, dc->res_pool, pipe); + struct pipe_ctx *pipe_4to1; + if (odm && old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn30_find_split_pipe(dc, context, old_index); ASSERT(pipe_4to1); if (!pipe_4to1) goto validate_fail; @@ -2083,7 +2150,14 @@ static bool dcn30_internal_validate_bw( goto validate_fail; newly_split[pipe_4to1->pipe_idx] = true; - pipe_4to1 = find_idle_secondary_pipe(&context->res_ctx, dc->res_pool, pipe); + if (odm && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn30_find_split_pipe(dc, context, old_index); ASSERT(pipe_4to1); if (!pipe_4to1) goto validate_fail; @@ -2127,7 +2201,7 @@ validate_out: return out; } -static void dcn30_calculate_wm( +void dcn30_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, @@ -2135,6 +2209,8 @@ static void dcn30_calculate_wm( { int i, pipe_idx; double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != + dm_dram_clock_change_unsupported; if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) dcfclk = context->bw_ctx.dml.soc.min_dcfclk; @@ -2168,30 +2244,12 @@ static void dcn30_calculate_wm( pipes[0].clks_cfg.voltage = vlevel; pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - /* Set C: - * DCFCLK: Min Required - * FCLK(proportional to UCLK): 1GHz or Max - * pstate latency overriden to 5us - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - /* Set D: * DCFCLK: Min Required * FCLK(proportional to UCLK): 1GHz or Max * sr_enter_exit = 4, sr_exit = 2us */ + /* if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; @@ -2205,29 +2263,72 @@ static void dcn30_calculate_wm( context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + */ - /* Set A: + /* Set C: * DCFCLK: Min Required * FCLK(proportional to UCLK): 1GHz or Max - * - * Set A calculated last so that following calculations are based on Set A + * pstate latency overridden to 5us */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + unsigned int min_dram_speed_mts_margin = 160; + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us; + + if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_unsupported) + min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; + + for (i = 3; i > 0; i--) { + if ((min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) && + (min_dram_speed_mts - min_dram_speed_mts_margin < dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)) + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; + } + + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; } - context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - context->perf_params.stutter_period_us = - context->bw_ctx.dml.vba.StutterPeriod; + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + if (!pstate_en) { + /* The only difference between A and C is p-state latency, if p-state is not supported we want to + * calculate DLG based on dummy p-state latency, and max out the set A p-state watermark + */ + context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0x13FFFF; + } else { + /* Set A: + * DCFCLK: Min Required + * FCLK(proportional to UCLK): 1GHz or Max + * + * Set A calculated last so that following calculations are based on Set A + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } + + context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + + /* Make set D = set A until set D is enabled */ + context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) @@ -2247,6 +2348,13 @@ static void dcn30_calculate_wm( pipe_idx++; } + + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + + if (!pstate_en) + /* Restore full p-state latency */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; } bool dcn30_validate_bandwidth(struct dc *dc, @@ -2279,8 +2387,7 @@ bool dcn30_validate_bandwidth(struct dc *dc, goto validate_out; } - dcn30_calculate_wm(dc, context, pipes, pipe_cnt, vlevel); - dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + dc->res_pool->funcs->calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); BW_VAL_TRACE_END_WATERMARKS(); @@ -2448,6 +2555,7 @@ static const struct resource_funcs dcn30_res_pool_funcs = { .link_enc_create = dcn30_link_encoder_create, .panel_cntl_create = dcn30_panel_cntl_create, .validate_bandwidth = dcn30_validate_bandwidth, + .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg, .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, .add_stream_to_ctx = dcn30_add_stream_to_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h index c9d5f94092a0..d163812af858 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h @@ -55,6 +55,11 @@ unsigned int dcn30_calc_max_scaled_time( bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate); +void dcn30_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); void dcn30_populate_dml_writeback_from_context( struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 50b7d011705d..9e0ae18e71fa 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -5558,7 +5558,7 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport( } } - if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) { + if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { *DRAMClockChangeSupport = dm_dram_clock_change_vactive; } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) { *DRAMClockChangeSupport = dm_dram_clock_change_vblank; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 1daa563c8ff4..6e6bc66e49f0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -101,7 +101,7 @@ struct resource_funcs { struct dc *dc, struct dc_state *context, bool fast_validate); - void (*calculate_wm)( + void (*calculate_wm_and_dlg)( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c index f0a0d419e555..1053b165c139 100644 --- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c @@ -99,6 +99,12 @@ static void virtual_setup_stereo_sync( bool enable) {} +static void virtual_stream_encoder_set_dsc_pps_info_packet( + struct stream_encoder *enc, + bool enable, + uint8_t *dsc_packed_pps) +{} + static const struct stream_encoder_funcs virtual_str_enc_funcs = { .dp_set_odm_combine = virtual_enc_dp_set_odm_combine, @@ -128,6 +134,7 @@ static const struct stream_encoder_funcs virtual_str_enc_funcs = { .hdmi_reset_stream_attribute = virtual_stream_encoder_reset_hdmi_stream_attribute, .dig_connect_to_otg = virtual_dig_connect_to_otg, .setup_stereo_sync = virtual_setup_stereo_sync, + .dp_set_dsc_pps_info_packet = virtual_stream_encoder_set_dsc_pps_info_packet, }; bool virtual_stream_encoder_construct( diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index f74c7fabd0a9..d103ec1eaa73 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -36,10 +36,10 @@ /* Firmware versioning. */ #ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0x82f998da6 +#define DMUB_FW_VERSION_GIT_HASH 0x9cf8f05fe #define DMUB_FW_VERSION_MAJOR 0 #define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 32 +#define DMUB_FW_VERSION_REVISION 35 #define DMUB_FW_VERSION_TEST 0 #define DMUB_FW_VERSION_VBIOS 0 #define DMUB_FW_VERSION_HOTFIX 0 @@ -57,6 +57,7 @@ #define SET_ABM_PIPE_GRADUALLY_DISABLE 0 #define SET_ABM_PIPE_IMMEDIATELY_DISABLE 255 +#define SET_ABM_PIPE_IMMEDIATE_KEEP_GAIN_DISABLE 254 #define SET_ABM_PIPE_NORMAL 1 /* Maximum number of streams on any ASIC. */ @@ -69,10 +70,6 @@ #define PHYSICAL_ADDRESS_LOC union large_integer #endif -#if defined(__cplusplus) -extern "C" { -#endif - #ifndef dmub_memcpy #define dmub_memcpy(dest, source, bytes) memcpy((dest), (source), (bytes)) #endif @@ -81,6 +78,10 @@ extern "C" { #define dmub_memset(dest, val, bytes) memset((dest), (val), (bytes)) #endif +#if defined(__cplusplus) +extern "C" { +#endif + #ifndef dmub_udelay #define dmub_udelay(microseconds) udelay(microseconds) #endif @@ -170,7 +171,7 @@ union dmub_fw_boot_status { uint32_t dal_fw : 1; uint32_t mailbox_rdy : 1; uint32_t optimized_init_done : 1; - uint32_t reserved : 29; + uint32_t restore_required : 1; } bits; uint32_t all; }; @@ -179,6 +180,7 @@ enum dmub_fw_boot_status_bit { DMUB_FW_BOOT_STATUS_BIT_DAL_FIRMWARE = (1 << 0), DMUB_FW_BOOT_STATUS_BIT_MAILBOX_READY = (1 << 1), DMUB_FW_BOOT_STATUS_BIT_OPTIMIZED_INIT_DONE = (1 << 2), + DMUB_FW_BOOT_STATUS_BIT_RESTORE_REQUIRED = (1 << 3), }; /* Register bit definition for SCRATCH15 */ @@ -298,9 +300,17 @@ enum dmub_cmd_type { DMUB_CMD__PSR = 64, DMUB_CMD__ABM = 66, DMUB_CMD__HW_LOCK = 69, + DMUB_CMD__DP_AUX_ACCESS = 70, + DMUB_CMD__OUTBOX1_ENABLE = 71, DMUB_CMD__VBIOS = 128, }; +enum dmub_out_cmd_type { + DMUB_OUT_CMD__NULL = 0, + DMUB_OUT_CMD__DP_AUX_REPLY = 1, + DMUB_OUT_CMD__DP_HPD_NOTIFY = 2, +}; + #pragma pack(push, 1) struct dmub_cmd_header { @@ -456,6 +466,78 @@ struct dmub_rb_cmd_dpphy_init { uint8_t reserved[60]; }; +enum dp_aux_request_action { + DP_AUX_REQ_ACTION_I2C_WRITE = 0x00, + DP_AUX_REQ_ACTION_I2C_READ = 0x10, + DP_AUX_REQ_ACTION_I2C_STATUS_REQ = 0x20, + DP_AUX_REQ_ACTION_I2C_WRITE_MOT = 0x40, + DP_AUX_REQ_ACTION_I2C_READ_MOT = 0x50, + DP_AUX_REQ_ACTION_I2C_STATUS_REQ_MOT = 0x60, + DP_AUX_REQ_ACTION_DPCD_WRITE = 0x80, + DP_AUX_REQ_ACTION_DPCD_READ = 0x90 +}; + +/* DP AUX command */ +struct aux_transaction_parameters { + uint8_t is_i2c_over_aux; + uint8_t action; + uint8_t length; + uint8_t pad; + uint32_t address; + uint8_t data[16]; +}; + +struct dmub_cmd_dp_aux_control_data { + uint32_t handle; + uint8_t port_index; + uint8_t sw_crc_enabled; + uint16_t timeout; + struct aux_transaction_parameters dpaux; +}; + +struct dmub_rb_cmd_dp_aux_access { + struct dmub_cmd_header header; + struct dmub_cmd_dp_aux_control_data aux_control; +}; + +struct dmub_rb_cmd_outbox1_enable { + struct dmub_cmd_header header; + uint32_t enable; +}; + +/* DP AUX Reply command - OutBox Cmd */ +struct aux_reply_data { + uint8_t command; + uint8_t length; + uint8_t pad[2]; + uint8_t data[16]; +}; + +struct aux_reply_control_data { + uint32_t handle; + uint8_t phy_port_index; + uint8_t result; + uint16_t pad; +}; + +struct dmub_rb_cmd_dp_aux_reply { + struct dmub_cmd_header header; + struct aux_reply_control_data control; + struct aux_reply_data reply_data; +}; + +struct dp_hpd_data { + uint8_t phy_port_index; + uint8_t hpd_type; + uint8_t hpd_status; + uint8_t pad; +}; + +struct dmub_rb_cmd_dp_hpd_notify { + struct dmub_cmd_header header; + struct dp_hpd_data hpd_data; +}; + /* * Command IDs should be treated as stable ABI. * Do not reuse or modify IDs. @@ -685,8 +767,15 @@ union dmub_rb_cmd { struct dmub_rb_cmd_abm_set_ambient_level abm_set_ambient_level; struct dmub_rb_cmd_abm_set_pwm_frac abm_set_pwm_frac; struct dmub_rb_cmd_abm_init_config abm_init_config; + struct dmub_rb_cmd_dp_aux_access dp_aux_access; + struct dmub_rb_cmd_outbox1_enable outbox1_enable; }; +union dmub_rb_out_cmd { + struct dmub_rb_cmd_common cmd_common; + struct dmub_rb_cmd_dp_aux_reply dp_aux_reply; + struct dmub_rb_cmd_dp_hpd_notify dp_hpd_notify; +}; #pragma pack(pop) @@ -759,6 +848,25 @@ static inline bool dmub_rb_push_front(struct dmub_rb *rb, return true; } +static inline bool dmub_rb_out_push_front(struct dmub_rb *rb, + const union dmub_rb_out_cmd *cmd) +{ + uint8_t *dst = (uint8_t *)(rb->base_address) + rb->wrpt; + const uint8_t *src = (uint8_t *)cmd; + + if (dmub_rb_full(rb)) + return false; + + dmub_memcpy(dst, src, DMUB_RB_CMD_SIZE); + + rb->wrpt += DMUB_RB_CMD_SIZE; + + if (rb->wrpt >= rb->capacity) + rb->wrpt %= rb->capacity; + + return true; +} + static inline bool dmub_rb_front(struct dmub_rb *rb, union dmub_rb_cmd *cmd) { @@ -772,6 +880,23 @@ static inline bool dmub_rb_front(struct dmub_rb *rb, return true; } +static inline bool dmub_rb_out_front(struct dmub_rb *rb, + union dmub_rb_out_cmd *cmd) +{ + const uint64_t volatile *src = (const uint64_t volatile *)(rb->base_address) + rb->rptr / sizeof(uint64_t); + uint64_t *dst = (uint64_t *)cmd; + int i; + + if (dmub_rb_empty(rb)) + return false; + + // copying data + for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) + *dst++ = *src++; + + return true; +} + static inline bool dmub_rb_pop_front(struct dmub_rb *rb) { if (dmub_rb_empty(rb)) diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index e98c84ef206f..10dc481ecbc4 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -47,6 +47,40 @@ enum amd_apu_flags { AMD_APU_IS_RENOIR = 0x00000008UL, }; +/** +* DOC: IP Blocks +* +* GPUs are composed of IP (intellectual property) blocks. These +* IP blocks provide various functionalities: display, graphics, +* video decode, etc. The IP blocks that comprise a particular GPU +* are listed in the GPU's respective SoC file. amdgpu_device.c +* acquires the list of IP blocks for the GPU in use on initialization. +* It can then operate on this list to perform standard driver operations +* such as: init, fini, suspend, resume, etc. +* +* +* IP block implementations are named using the following convention: +* <functionality>_v<version> (E.g.: gfx_v6_0). +*/ + +/** +* enum amd_ip_block_type - Used to classify IP blocks by functionality. +* +* @AMD_IP_BLOCK_TYPE_COMMON: GPU Family +* @AMD_IP_BLOCK_TYPE_GMC: Graphics Memory Controller +* @AMD_IP_BLOCK_TYPE_IH: Interrupt Handler +* @AMD_IP_BLOCK_TYPE_SMC: System Management Controller +* @AMD_IP_BLOCK_TYPE_PSP: Platform Security Processor +* @AMD_IP_BLOCK_TYPE_DCE: Display and Compositing Engine +* @AMD_IP_BLOCK_TYPE_GFX: Graphics and Compute Engine +* @AMD_IP_BLOCK_TYPE_SDMA: System DMA Engine +* @AMD_IP_BLOCK_TYPE_UVD: Unified Video Decoder +* @AMD_IP_BLOCK_TYPE_VCE: Video Compression Engine +* @AMD_IP_BLOCK_TYPE_ACP: Audio Co-Processor +* @AMD_IP_BLOCK_TYPE_VCN: Video Core/Codec Next +* @AMD_IP_BLOCK_TYPE_MES: Micro-Engine Scheduler +* @AMD_IP_BLOCK_TYPE_JPEG: JPEG Engine +*/ enum amd_ip_block_type { AMD_IP_BLOCK_TYPE_COMMON, AMD_IP_BLOCK_TYPE_GMC, @@ -128,6 +162,34 @@ enum amd_powergating_state { #define AMD_PG_SUPPORT_ATHUB (1 << 16) #define AMD_PG_SUPPORT_JPEG (1 << 17) +/** + * enum PP_FEATURE_MASK - Used to mask power play features. + * + * @PP_SCLK_DPM_MASK: Dynamic adjustment of the system (graphics) clock. + * @PP_MCLK_DPM_MASK: Dynamic adjustment of the memory clock. + * @PP_PCIE_DPM_MASK: Dynamic adjustment of PCIE clocks and lanes. + * @PP_SCLK_DEEP_SLEEP_MASK: System (graphics) clock deep sleep. + * @PP_POWER_CONTAINMENT_MASK: Power containment. + * @PP_UVD_HANDSHAKE_MASK: Unified video decoder handshake. + * @PP_SMC_VOLTAGE_CONTROL_MASK: Dynamic voltage control. + * @PP_VBI_TIME_SUPPORT_MASK: Vertical blank interval support. + * @PP_ULV_MASK: Ultra low voltage. + * @PP_ENABLE_GFX_CG_THRU_SMU: SMU control of GFX engine clockgating. + * @PP_CLOCK_STRETCH_MASK: Clock stretching. + * @PP_OD_FUZZY_FAN_CONTROL_MASK: Overdrive fuzzy fan control. + * @PP_SOCCLK_DPM_MASK: Dynamic adjustment of the SoC clock. + * @PP_DCEFCLK_DPM_MASK: Dynamic adjustment of the Display Controller Engine Fabric clock. + * @PP_OVERDRIVE_MASK: Over- and under-clocking support. + * @PP_GFXOFF_MASK: Dynamic graphics engine power control. + * @PP_ACG_MASK: Adaptive clock generator. + * @PP_STUTTER_MODE: Stutter mode. + * @PP_AVFS_MASK: Adaptive voltage and frequency scaling. + * + * To override these settings on boot, append amdgpu.ppfeaturemask=<mask> to + * the kernel's command line parameters. This is usually done through a system's + * boot loader (E.g. GRUB). If manually loading the driver, pass + * ppfeaturemask=<mask> as a modprobe parameter. + */ enum PP_FEATURE_MASK { PP_SCLK_DPM_MASK = 0x1, PP_MCLK_DPM_MASK = 0x2, @@ -165,56 +227,59 @@ enum DC_DEBUG_MASK { }; enum amd_dpm_forced_level; + /** * struct amd_ip_funcs - general hooks for managing amdgpu IP Blocks + * @name: Name of IP block + * @early_init: sets up early driver state (pre sw_init), + * does not configure hw - Optional + * @late_init: sets up late driver/hw state (post hw_init) - Optional + * @sw_init: sets up driver state, does not configure hw + * @sw_fini: tears down driver state, does not configure hw + * @hw_init: sets up the hw state + * @hw_fini: tears down the hw state + * @late_fini: final cleanup + * @suspend: handles IP specific hw/sw changes for suspend + * @resume: handles IP specific hw/sw changes for resume + * @is_idle: returns current IP block idle status + * @wait_for_idle: poll for idle + * @check_soft_reset: check soft reset the IP block + * @pre_soft_reset: pre soft reset the IP block + * @soft_reset: soft reset the IP block + * @post_soft_reset: post soft reset the IP block + * @set_clockgating_state: enable/disable cg for the IP block + * @set_powergating_state: enable/disable pg for the IP block + * @get_clockgating_state: get current clockgating status + * @enable_umd_pstate: enable UMD powerstate + * + * These hooks provide an interface for controlling the operational state + * of IP blocks. After acquiring a list of IP blocks for the GPU in use, + * the driver can make chip-wide state changes by walking this list and + * making calls to hooks from each IP block. This list is ordered to ensure + * that the driver initializes the IP blocks in a safe sequence. */ struct amd_ip_funcs { - /** @name: Name of IP block */ char *name; - /** - * @early_init: - * - * sets up early driver state (pre sw_init), - * does not configure hw - Optional - */ int (*early_init)(void *handle); - /** @late_init: sets up late driver/hw state (post hw_init) - Optional */ int (*late_init)(void *handle); - /** @sw_init: sets up driver state, does not configure hw */ int (*sw_init)(void *handle); - /** @sw_fini: tears down driver state, does not configure hw */ int (*sw_fini)(void *handle); - /** @hw_init: sets up the hw state */ int (*hw_init)(void *handle); - /** @hw_fini: tears down the hw state */ int (*hw_fini)(void *handle); - /** @late_fini: final cleanup */ void (*late_fini)(void *handle); - /** @suspend: handles IP specific hw/sw changes for suspend */ int (*suspend)(void *handle); - /** @resume: handles IP specific hw/sw changes for resume */ int (*resume)(void *handle); - /** @is_idle: returns current IP block idle status */ bool (*is_idle)(void *handle); - /** @wait_for_idle: poll for idle */ int (*wait_for_idle)(void *handle); - /** @check_soft_reset: check soft reset the IP block */ bool (*check_soft_reset)(void *handle); - /** @pre_soft_reset: pre soft reset the IP block */ int (*pre_soft_reset)(void *handle); - /** @soft_reset: soft reset the IP block */ int (*soft_reset)(void *handle); - /** @post_soft_reset: post soft reset the IP block */ int (*post_soft_reset)(void *handle); - /** @set_clockgating_state: enable/disable cg for the IP block */ int (*set_clockgating_state)(void *handle, enum amd_clockgating_state state); - /** @set_powergating_state: enable/disable pg for the IP block */ int (*set_powergating_state)(void *handle, enum amd_powergating_state state); - /** @get_clockgating_state: get current clockgating status */ void (*get_clockgating_state)(void *handle, u32 *flags); - /** @enable_umd_pstate: enable UMD powerstate */ int (*enable_umd_pstate)(void *handle, enum amd_dpm_forced_level *level); }; diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_offset.h index f41556abfbbc..629a8a3b55e9 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_4_1_offset.h @@ -205,6 +205,8 @@ #define mmGCEA_EDC_CNT2_BASE_IDX 0 #define mmGCEA_EDC_CNT3 0x071b #define mmGCEA_EDC_CNT3_BASE_IDX 0 +#define mmGCEA_ERR_STATUS 0x0712 +#define mmGCEA_ERR_STATUS_BASE_IDX 0 // addressBlock: gc_gfxudec // base address: 0x30000 @@ -261,4 +263,4 @@ #define mmRLC_EDC_CNT2 0x4d41 #define mmRLC_EDC_CNT2_BASE_IDX 1 -#endif
\ No newline at end of file +#endif diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index fc592f60e6a0..e37b4b9f626d 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -212,6 +212,15 @@ struct tile_config { * IH ring entry. This function allows the KFD ISR to get the VMID * from the fault status register as early as possible. * + * @get_cu_occupancy: Function pointer that returns to caller the number + * of wave fronts that are in flight for all of the queues of a process + * as identified by its pasid. It is important to note that the value + * returned by this function is a snapshot of current moment and cannot + * guarantee any minimum for the number of waves in-flight. This function + * is defined for devices that belong to GFX9 and later GFX families. Care + * must be taken in calling this function as it is not defined for devices + * that belong to GFX8 and below GFX families. + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -286,6 +295,9 @@ struct kfd2kgd_calls { void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); + + void (*get_cu_occupancy)(struct kgd_dev *kgd, int pasid, int *wave_cnt, + int *max_waves_per_cu); }; #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 0aec28fda058..94132c70d7af 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -281,6 +281,7 @@ struct amd_pm_funcs { int (*get_power_limit)(void *handle, uint32_t *limit, bool default_limit); int (*get_power_profile_mode)(void *handle, char *buf); int (*set_power_profile_mode)(void *handle, long *input, uint32_t size); + int (*set_fine_grain_clk_vol)(void *handle, uint32_t type, long *input, uint32_t size); int (*odn_edit_dpm_table)(void *handle, uint32_t type, long *input, uint32_t size); int (*set_mp1_state)(void *handle, enum pp_mp1_state mp1_state); int (*smu_i2c_bus_access)(void *handle, bool acquire); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 2d924e88a215..529816637c73 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -827,6 +827,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, return -EINVAL; } } else { + + if (adev->powerplay.pp_funcs->set_fine_grain_clk_vol) { + ret = amdgpu_dpm_set_fine_grain_clk_vol(adev, type, + parameter, + parameter_size); + if (ret) { + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + return -EINVAL; + } + } + if (adev->powerplay.pp_funcs->odn_edit_dpm_table) { ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, parameter_size); diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index dff4a5f99bb0..f6e0e7d8a007 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -349,6 +349,10 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->set_power_profile_mode(\ (adev)->powerplay.pp_handle, parameter, size)) +#define amdgpu_dpm_set_fine_grain_clk_vol(adev, type, parameter, size) \ + ((adev)->powerplay.pp_funcs->set_fine_grain_clk_vol(\ + (adev)->powerplay.pp_handle, type, parameter, size)) + #define amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, size) \ ((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\ (adev)->powerplay.pp_handle, type, parameter, size)) diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 85c5e8627e3b..44fd0cd069de 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -453,6 +453,7 @@ struct smu_context struct work_struct throttling_logging_work; atomic64_t throttle_int_counter; + struct work_struct interrupt_work; unsigned fan_max_rpm; unsigned manual_fan_speed_rpm; @@ -601,6 +602,7 @@ struct pptable_funcs { int (*deep_sleep_control)(struct smu_context *smu, bool enablement); int (*get_fan_parameters)(struct smu_context *smu); int (*post_init)(struct smu_context *smu); + void (*interrupt_work)(struct smu_context *smu); }; typedef enum { diff --git a/drivers/gpu/drm/amd/pm/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/inc/hwmgr.h index 1b3529efc91e..3898a95ec28b 100644 --- a/drivers/gpu/drm/amd/pm/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/pm/inc/hwmgr.h @@ -340,6 +340,9 @@ struct pp_hwmgr_func { int (*odn_edit_dpm_table)(struct pp_hwmgr *hwmgr, enum PP_OD_DPM_TABLE_COMMAND type, long *input, uint32_t size); + int (*set_fine_grain_clk_vol)(struct pp_hwmgr *hwmgr, + enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size); int (*set_power_limit)(struct pp_hwmgr *hwmgr, uint32_t n); int (*powergate_mmhub)(struct pp_hwmgr *hwmgr); int (*smus_notify_pwe)(struct pp_hwmgr *hwmgr); @@ -347,6 +350,8 @@ struct pp_hwmgr_func { int (*enable_mgpu_fan_boost)(struct pp_hwmgr *hwmgr); int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); + int (*set_hard_min_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); + int (*set_soft_max_gfxclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap); int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state); int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state); diff --git a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_sienna_cichlid.h b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_sienna_cichlid.h index 11a6cf96fe0c..1275246769d9 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_sienna_cichlid.h +++ b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_sienna_cichlid.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define SMU11_DRIVER_IF_VERSION 0x37 +#define SMU11_DRIVER_IF_VERSION 0x39 #define PPTABLE_Sienna_Cichlid_SMU_VERSION 6 @@ -962,7 +962,7 @@ typedef struct { uint8_t FanLinearPwmPoints[NUM_OD_FAN_MAX_POINTS]; uint8_t FanLinearTempPoints[NUM_OD_FAN_MAX_POINTS]; uint16_t MaxOpTemp; // Degree Celcius - uint16_t Padding_16[1]; + int16_t VddGfxOffset; // in mV uint8_t FanZeroRpmEnable; uint8_t FanZeroRpmStopTemp; uint8_t FanMode; diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index 7ae83df83edb..2d1c3babaa3a 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -30,7 +30,7 @@ #define SMU11_DRIVER_IF_VERSION_NV10 0x36 #define SMU11_DRIVER_IF_VERSION_NV12 0x36 #define SMU11_DRIVER_IF_VERSION_NV14 0x36 -#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x37 +#define SMU11_DRIVER_IF_VERSION_Sienna_Cichlid 0x39 #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0x5 /* MP Apertures */ @@ -280,5 +280,7 @@ int smu_v11_0_gfx_ulv_control(struct smu_context *smu, int smu_v11_0_deep_sleep_control(struct smu_context *smu, bool enablement); +void smu_v11_0_interrupt_work(struct smu_context *smu); + #endif #endif diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index a6321f2063c1..eab9768029c1 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -911,6 +911,19 @@ static int pp_set_power_profile_mode(void *handle, long *input, uint32_t size) return ret; } +static int pp_set_fine_grain_clk_vol(void *handle, uint32_t type, long *input, uint32_t size) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; + + if (hwmgr->hwmgr_func->set_fine_grain_clk_vol == NULL) + return 0; + + return hwmgr->hwmgr_func->set_fine_grain_clk_vol(hwmgr, type, input, size); +} + static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint32_t size) { struct pp_hwmgr *hwmgr = handle; @@ -920,7 +933,7 @@ static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint3 if (hwmgr->hwmgr_func->odn_edit_dpm_table == NULL) { pr_info_ratelimited("%s was not implemented.\n", __func__); - return -EINVAL; + return 0; } return hwmgr->hwmgr_func->odn_edit_dpm_table(hwmgr, type, input, size); @@ -1645,6 +1658,7 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .set_powergating_by_smu = pp_set_powergating_by_smu, .get_power_profile_mode = pp_get_power_profile_mode, .set_power_profile_mode = pp_set_power_profile_mode, + .set_fine_grain_clk_vol = pp_set_fine_grain_clk_vol, .odn_edit_dpm_table = pp_odn_edit_dpm_table, .set_mp1_state = pp_dpm_set_mp1_state, .set_power_limit = pp_set_power_limit, diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index a5d1a32ab160..cf60f3992303 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -242,6 +242,34 @@ static int smu10_set_hard_min_fclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t cloc return 0; } +static int smu10_set_hard_min_gfxclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + + if (clock && smu10_data->gfx_actual_soft_min_freq != clock) { + smu10_data->gfx_actual_soft_min_freq = clock; + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + smu10_data->gfx_actual_soft_min_freq, + NULL); + } + return 0; +} + +static int smu10_set_soft_max_gfxclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + + if (clock && smu10_data->gfx_max_freq_limit != (clock * 100)) { + smu10_data->gfx_max_freq_limit = clock * 100; + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + clock, + NULL); + } + return 0; +} + static int smu10_set_active_display_count(struct pp_hwmgr *hwmgr, uint32_t count) { struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); @@ -527,6 +555,9 @@ static int smu10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) hwmgr->pstate_sclk = SMU10_UMD_PSTATE_GFXCLK * 100; hwmgr->pstate_mclk = SMU10_UMD_PSTATE_FCLK * 100; + /* enable the pp_od_clk_voltage sysfs file */ + hwmgr->od_enabled = 1; + return result; } @@ -563,6 +594,8 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, struct smu10_hwmgr *data = hwmgr->backend; uint32_t min_sclk = hwmgr->display_config->min_core_set_clock; uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100; + uint32_t index_fclk = data->clock_vol_info.vdd_dep_on_fclk->count - 1; + uint32_t index_socclk = data->clock_vol_info.vdd_dep_on_socclk->count - 1; if (hwmgr->smu_version < 0x1E3700) { pr_info("smu firmware version too old, can not set dpm level\n"); @@ -676,13 +709,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, hwmgr->display_config->num_display > 3 ? - SMU10_UMD_PSTATE_PEAK_FCLK : + data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk : min_mclk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinSocclkByFreq, - SMU10_UMD_PSTATE_MIN_SOCCLK, + data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinVcn, @@ -695,11 +728,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, - SMU10_UMD_PSTATE_PEAK_FCLK, + data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxSocclkByFreq, - SMU10_UMD_PSTATE_PEAK_SOCCLK, + data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxVcn, @@ -947,6 +980,26 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, ((mclk_table->entries[i].clk / 100) == now) ? "*" : ""); break; + case OD_SCLK: + if (hwmgr->od_enabled) { + size = sprintf(buf, "%s:\n", "OD_SCLK"); + + size += sprintf(buf + size, "0: %10uMhz\n", + (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : data->gfx_min_freq_limit/100); + size += sprintf(buf + size, "1: %10uMhz\n", data->gfx_max_freq_limit/100); + } + break; + case OD_RANGE: + if (hwmgr->od_enabled) { + uint32_t min_freq, max_freq = 0; + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + + size = sprintf(buf, "%s:\n", "OD_RANGE"); + size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", + min_freq, max_freq); + } + break; default: break; } @@ -1359,6 +1412,32 @@ static int smu10_asic_reset(struct pp_hwmgr *hwmgr, enum SMU_ASIC_RESET_MODE mod NULL); } +static int smu10_set_fine_grain_clk_vol(struct pp_hwmgr *hwmgr, + enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size) +{ + if (!hwmgr->od_enabled) { + pr_err("Fine grain not support\n"); + return -EINVAL; + } + + if (size != 2) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + + if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { + if (input[0] == 0) + smu10_set_hard_min_gfxclk_by_freq(hwmgr, input[1]); + else if (input[0] == 1) + smu10_set_soft_max_gfxclk_by_freq(hwmgr, input[1]); + else + return -EINVAL; + } + + return 0; +} + static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .backend_init = smu10_hwmgr_backend_init, .backend_fini = smu10_hwmgr_backend_fini, @@ -1399,9 +1478,12 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .powergate_sdma = smu10_powergate_sdma, .set_hard_min_dcefclk_by_freq = smu10_set_hard_min_dcefclk_by_freq, .set_hard_min_fclk_by_freq = smu10_set_hard_min_fclk_by_freq, + .set_hard_min_gfxclk_by_freq = smu10_set_hard_min_gfxclk_by_freq, + .set_soft_max_gfxclk_by_freq = smu10_set_soft_max_gfxclk_by_freq, .get_power_profile_mode = smu10_get_power_profile_mode, .set_power_profile_mode = smu10_set_power_profile_mode, .asic_reset = smu10_asic_reset, + .set_fine_grain_clk_vol = smu10_set_fine_grain_clk_vol, }; int smu10_init_function_pointers(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h index ee0c9591620b..6c9b5f060902 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h @@ -284,7 +284,7 @@ struct smu10_hwmgr { uint32_t dclk_soft_min; uint32_t gfx_actual_soft_min_freq; uint32_t gfx_min_freq_limit; - uint32_t gfx_max_freq_limit; + uint32_t gfx_max_freq_limit; /* in 10Khz*/ bool vcn_power_gated; bool vcn_dpg_mode; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 4a3b64aa21ce..3bf8be4d107b 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -1585,9 +1585,19 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr) data->current_profile_setting.sclk_down_hyst = 100; data->current_profile_setting.sclk_activity = SMU7_SCLK_TARGETACTIVITY_DFLT; data->current_profile_setting.bupdate_mclk = 1; - data->current_profile_setting.mclk_up_hyst = 0; - data->current_profile_setting.mclk_down_hyst = 100; - data->current_profile_setting.mclk_activity = SMU7_MCLK_TARGETACTIVITY_DFLT; + if (adev->gmc.vram_width == 256) { + data->current_profile_setting.mclk_up_hyst = 10; + data->current_profile_setting.mclk_down_hyst = 60; + data->current_profile_setting.mclk_activity = 25; + } else if (adev->gmc.vram_width == 128) { + data->current_profile_setting.mclk_up_hyst = 5; + data->current_profile_setting.mclk_down_hyst = 16; + data->current_profile_setting.mclk_activity = 20; + } else if (adev->gmc.vram_width == 64) { + data->current_profile_setting.mclk_up_hyst = 3; + data->current_profile_setting.mclk_down_hyst = 16; + data->current_profile_setting.mclk_activity = 20; + } hwmgr->workload_mask = 1 << hwmgr->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu9_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu9_smumgr.c index adfbcbe5d113..8a9aee85043e 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu9_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu9_smumgr.c @@ -61,9 +61,6 @@ static uint32_t smu9_wait_for_response(struct pp_hwmgr *hwmgr) uint32_t reg; uint32_t ret; - /* Due to the L1 policy problem under SRIOV, we have to use - * mmMP1_SMN_C2PMSG_103 as the driver response register - */ if (hwmgr->pp_one_vf) { reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_103); @@ -148,10 +145,6 @@ int smu9_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, smu9_wait_for_response(hwmgr); - /* Due to the L1 policy problem under SRIOV, we have to use - * mmMP1_SMN_C2PMSG_101 as the driver message register and - * mmMP1_SMN_C2PMSG_102 as the driver parameter register. - */ if (hwmgr->pp_one_vf) { WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_103, 0); WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_102, parameter); diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c index 1e222c5d91a4..daf122f24f23 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega10_smumgr.c @@ -209,11 +209,13 @@ static int vega10_smu_init(struct pp_hwmgr *hwmgr) int ret; struct cgs_firmware_info info = {0}; - ret = cgs_get_firmware_info(hwmgr->device, - CGS_UCODE_ID_SMU, - &info); - if (ret || !info.kptr) - return -EINVAL; + if (!amdgpu_sriov_vf((struct amdgpu_device *)hwmgr->adev)) { + ret = cgs_get_firmware_info(hwmgr->device, + CGS_UCODE_ID_SMU, + &info); + if (ret || !info.kptr) + return -EINVAL; + } priv = kzalloc(sizeof(struct vega10_smumgr), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 5c4b74f964fc..e41fd6ea6451 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -481,17 +481,6 @@ static int smu_late_init(void *handle) return ret; } - /* - * Set initialized values (get from vbios) to dpm tables context such as - * gfxclk, memclk, dcefclk, and etc. And enable the DPM feature for each - * type of clks. - */ - ret = smu_set_default_dpm_table(smu); - if (ret) { - dev_err(adev->dev, "Failed to setup default dpm clock tables!\n"); - return ret; - } - ret = smu_populate_umd_state_clk(smu); if (ret) { dev_err(adev->dev, "Failed to populate UMD state clocks!\n"); @@ -780,6 +769,19 @@ static void smu_throttling_logging_work_fn(struct work_struct *work) smu_log_thermal_throttling(smu); } +static void smu_interrupt_work_fn(struct work_struct *work) +{ + struct smu_context *smu = container_of(work, struct smu_context, + interrupt_work); + + mutex_lock(&smu->mutex); + + if (smu->ppt_funcs && smu->ppt_funcs->interrupt_work) + smu->ppt_funcs->interrupt_work(smu); + + mutex_unlock(&smu->mutex); +} + static int smu_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -802,6 +804,7 @@ static int smu_sw_init(void *handle) mutex_init(&smu->message_lock); INIT_WORK(&smu->throttling_logging_work, smu_throttling_logging_work_fn); + INIT_WORK(&smu->interrupt_work, smu_interrupt_work_fn); atomic64_set(&smu->throttle_int_counter, 0); smu->watermarks_bitmap = 0; smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; @@ -832,10 +835,13 @@ static int smu_sw_init(void *handle) smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; smu->smu_dpm.requested_dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; - ret = smu_init_microcode(smu); - if (ret) { - dev_err(adev->dev, "Failed to load smu firmware!\n"); - return ret; + + if (!amdgpu_sriov_vf(adev)) { + ret = smu_init_microcode(smu); + if (ret) { + dev_err(adev->dev, "Failed to load smu firmware!\n"); + return ret; + } } ret = smu_smc_table_sw_init(smu); @@ -1013,6 +1019,17 @@ static int smu_smc_hw_setup(struct smu_context *smu) return ret; } + /* + * Set initialized values (get from vbios) to dpm tables context such as + * gfxclk, memclk, dcefclk, and etc. And enable the DPM feature for each + * type of clks. + */ + ret = smu_set_default_dpm_table(smu); + if (ret) { + dev_err(adev->dev, "Failed to setup default dpm clock tables!\n"); + return ret; + } + ret = smu_notify_display_change(smu); if (ret) return ret; @@ -1194,6 +1211,7 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) int ret = 0; cancel_work_sync(&smu->throttling_logging_work); + cancel_work_sync(&smu->interrupt_work); ret = smu_disable_thermal_alert(smu); if (ret) { @@ -1214,7 +1232,6 @@ static int smu_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; - int ret = 0; if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev)) return 0; @@ -1230,11 +1247,7 @@ static int smu_hw_fini(void *handle) adev->pm.dpm_enabled = false; - ret = smu_smc_hw_cleanup(smu); - if (ret) - return ret; - - return 0; + return smu_smc_hw_cleanup(smu); } int smu_reset(struct smu_context *smu) @@ -1823,18 +1836,12 @@ int smu_set_watermarks_for_clock_ranges(struct smu_context *smu, if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; - mutex_lock(&smu->mutex); + if (smu->disable_watermark) + return 0; - if (!smu->disable_watermark && - smu_feature_is_enabled(smu, SMU_FEATURE_DPM_DCEFCLK_BIT) && - smu_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT)) { - ret = smu_set_watermarks_table(smu, clock_ranges); + mutex_lock(&smu->mutex); - if (!(smu->watermarks_bitmap & WATERMARKS_EXIST)) { - smu->watermarks_bitmap |= WATERMARKS_EXIST; - smu->watermarks_bitmap &= ~WATERMARKS_LOADED; - } - } + ret = smu_set_watermarks_table(smu, clock_ranges); mutex_unlock(&smu->mutex); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index d298fa65274d..fc376281e629 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -2388,6 +2388,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .gfx_ulv_control = smu_v11_0_gfx_ulv_control, .deep_sleep_control = smu_v11_0_deep_sleep_control, .get_fan_parameters = arcturus_get_fan_parameters, + .interrupt_work = smu_v11_0_interrupt_work, }; void arcturus_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 985c70615944..8d8081c6bd38 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -316,6 +316,18 @@ navi10_get_allowed_feature_mask(struct smu_context *smu, if (smu->dc_controlled_by_gpio) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_ACDC_BIT); + if (adev->pm.pp_feature & PP_SOCCLK_DPM_MASK) + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_SOCCLK_BIT); + + /* DPM UCLK enablement should be skipped for navi10 A0 secure board */ + if (!(is_asic_secure(smu) && + (adev->asic_type == CHIP_NAVI10) && + (adev->rev_id == 0)) && + (adev->pm.pp_feature & PP_MCLK_DPM_MASK)) + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_UCLK_BIT) + | FEATURE_MASK(FEATURE_MEM_VDDCI_SCALING_BIT) + | FEATURE_MASK(FEATURE_MEM_MVDD_SCALING_BIT); + /* DS SOCCLK enablement should be skipped for navi10 A0 secure board */ if (is_asic_secure(smu) && (adev->asic_type == CHIP_NAVI10) && @@ -2279,13 +2291,14 @@ static int navi10_run_umc_cdr_workaround(struct smu_context *smu) } /* - * The messages below are only supported by 42.53.0 and later - * PMFWs. + * The messages below are only supported by Navi10 42.53.0 and later + * PMFWs and Navi14 53.29.0 and later PMFWs. * - PPSMC_MSG_SetDriverDummyTableDramAddrHigh * - PPSMC_MSG_SetDriverDummyTableDramAddrLow * - PPSMC_MSG_GetUMCFWWA */ - if (pmfw_version >= 0x2a3500) { + if (((adev->asic_type == CHIP_NAVI10) && (pmfw_version >= 0x2a3500)) || + ((adev->asic_type == CHIP_NAVI14) && (pmfw_version >= 0x351D00))) { ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GET_UMC_FW_WA, 0, @@ -2323,8 +2336,6 @@ static void navi10_fill_i2c_req(SwI2cRequest_t *req, bool write, { int i; - BUG_ON(numbytes > MAX_SW_I2C_COMMANDS); - req->I2CcontrollerPort = 0; req->I2CSpeed = 2; req->SlaveAddress = address; @@ -2362,6 +2373,12 @@ static int navi10_i2c_read_data(struct i2c_adapter *control, struct smu_table_context *smu_table = &adev->smu.smu_table; struct smu_table *table = &smu_table->driver_table; + if (numbytes > MAX_SW_I2C_COMMANDS) { + dev_err(adev->dev, "numbytes requested %d is over max allowed %d\n", + numbytes, MAX_SW_I2C_COMMANDS); + return -EINVAL; + } + memset(&req, 0, sizeof(req)); navi10_fill_i2c_req(&req, false, address, numbytes, data); @@ -2398,6 +2415,12 @@ static int navi10_i2c_write_data(struct i2c_adapter *control, SwI2cRequest_t req; struct amdgpu_device *adev = to_amdgpu_device(control); + if (numbytes > MAX_SW_I2C_COMMANDS) { + dev_err(adev->dev, "numbytes requested %d is over max allowed %d\n", + numbytes, MAX_SW_I2C_COMMANDS); + return -EINVAL; + } + memset(&req, 0, sizeof(req)); navi10_fill_i2c_req(&req, true, address, numbytes, data); @@ -2628,43 +2651,12 @@ static int navi10_enable_mgpu_fan_boost(struct smu_context *smu) static int navi10_post_smu_init(struct smu_context *smu) { - struct smu_feature *feature = &smu->smu_feature; struct amdgpu_device *adev = smu->adev; - uint64_t feature_mask = 0; int ret = 0; if (amdgpu_sriov_vf(adev)) return 0; - /* For Naiv1x, enable these features only after DAL initialization */ - if (adev->pm.pp_feature & PP_SOCCLK_DPM_MASK) - feature_mask |= FEATURE_MASK(FEATURE_DPM_SOCCLK_BIT); - - /* DPM UCLK enablement should be skipped for navi10 A0 secure board */ - if (!(is_asic_secure(smu) && - (adev->asic_type == CHIP_NAVI10) && - (adev->rev_id == 0)) && - (adev->pm.pp_feature & PP_MCLK_DPM_MASK)) - feature_mask |= FEATURE_MASK(FEATURE_DPM_UCLK_BIT) - | FEATURE_MASK(FEATURE_MEM_VDDCI_SCALING_BIT) - | FEATURE_MASK(FEATURE_MEM_MVDD_SCALING_BIT); - - if (!feature_mask) - return 0; - - bitmap_or(feature->allowed, - feature->allowed, - (unsigned long *)(&feature_mask), - SMU_FEATURE_MAX); - - ret = smu_cmn_feature_update_enable_state(smu, - feature_mask, - true); - if (ret) { - dev_err(adev->dev, "Failed to post uclk/socclk dpm enablement!\n"); - return ret; - } - ret = navi10_run_umc_cdr_workaround(smu); if (ret) { dev_err(adev->dev, "Failed to apply umc cdr workaround!\n"); @@ -2773,6 +2765,7 @@ static const struct pptable_funcs navi10_ppt_funcs = { .deep_sleep_control = smu_v11_0_deep_sleep_control, .get_fan_parameters = navi10_get_fan_parameters, .post_init = navi10_post_smu_init, + .interrupt_work = smu_v11_0_interrupt_work, }; void navi10_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index a2cb831ce8aa..c27806fd07e0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2422,8 +2422,6 @@ static void sienna_cichlid_fill_i2c_req(SwI2cRequest_t *req, bool write, { int i; - BUG_ON(numbytes > MAX_SW_I2C_COMMANDS); - req->I2CcontrollerPort = 0; req->I2CSpeed = 2; req->SlaveAddress = address; @@ -2461,6 +2459,12 @@ static int sienna_cichlid_i2c_read_data(struct i2c_adapter *control, struct smu_table_context *smu_table = &adev->smu.smu_table; struct smu_table *table = &smu_table->driver_table; + if (numbytes > MAX_SW_I2C_COMMANDS) { + dev_err(adev->dev, "numbytes requested %d is over max allowed %d\n", + numbytes, MAX_SW_I2C_COMMANDS); + return -EINVAL; + } + memset(&req, 0, sizeof(req)); sienna_cichlid_fill_i2c_req(&req, false, address, numbytes, data); @@ -2497,6 +2501,12 @@ static int sienna_cichlid_i2c_write_data(struct i2c_adapter *control, SwI2cRequest_t req; struct amdgpu_device *adev = to_amdgpu_device(control); + if (numbytes > MAX_SW_I2C_COMMANDS) { + dev_err(adev->dev, "numbytes requested %d is over max allowed %d\n", + numbytes, MAX_SW_I2C_COMMANDS); + return -EINVAL; + } + memset(&req, 0, sizeof(req)); sienna_cichlid_fill_i2c_req(&req, true, address, numbytes, data); @@ -2784,6 +2794,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .gfx_ulv_control = smu_v11_0_gfx_ulv_control, .deep_sleep_control = smu_v11_0_deep_sleep_control, .get_fan_parameters = sienna_cichlid_get_fan_parameters, + .interrupt_work = smu_v11_0_interrupt_work, }; void sienna_cichlid_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index b53872eb4398..2380759ddf48 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -322,39 +322,42 @@ int smu_v11_0_setup_pptable(struct smu_context *smu) void *table; uint16_t version_major, version_minor; - hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data; - version_major = le16_to_cpu(hdr->header.header_version_major); - version_minor = le16_to_cpu(hdr->header.header_version_minor); - if ((version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) || - adev->asic_type == CHIP_NAVY_FLOUNDER) { - dev_info(adev->dev, "use driver provided pptable %d\n", smu->smu_table.boot_values.pp_table_id); - switch (version_minor) { - case 0: - ret = smu_v11_0_set_pptable_v2_0(smu, &table, &size); - break; - case 1: - ret = smu_v11_0_set_pptable_v2_1(smu, &table, &size, - smu->smu_table.boot_values.pp_table_id); - break; - default: - ret = -EINVAL; - break; + if (!amdgpu_sriov_vf(adev)) { + hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data; + version_major = le16_to_cpu(hdr->header.header_version_major); + version_minor = le16_to_cpu(hdr->header.header_version_minor); + if ((version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) || + adev->asic_type == CHIP_NAVY_FLOUNDER) { + dev_info(adev->dev, "use driver provided pptable %d\n", smu->smu_table.boot_values.pp_table_id); + switch (version_minor) { + case 0: + ret = smu_v11_0_set_pptable_v2_0(smu, &table, &size); + break; + case 1: + ret = smu_v11_0_set_pptable_v2_1(smu, &table, &size, + smu->smu_table.boot_values.pp_table_id); + break; + default: + ret = -EINVAL; + break; + } + if (ret) + return ret; + goto out; } - if (ret) - return ret; + } - } else { - dev_info(adev->dev, "use vbios provided pptable\n"); - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - powerplayinfo); + dev_info(adev->dev, "use vbios provided pptable\n"); + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + powerplayinfo); - ret = amdgpu_atombios_get_data_table(adev, index, &atom_table_size, &frev, &crev, - (uint8_t **)&table); - if (ret) - return ret; - size = atom_table_size; - } + ret = amdgpu_atombios_get_data_table(adev, index, &atom_table_size, &frev, &crev, + (uint8_t **)&table); + if (ret) + return ret; + size = atom_table_size; +out: if (!smu->smu_table.power_play_table) smu->smu_table.power_play_table = table; if (!smu->smu_table.power_play_table_size) @@ -952,6 +955,12 @@ static int smu_v11_0_process_pending_interrupt(struct smu_context *smu) return ret; } +void smu_v11_0_interrupt_work(struct smu_context *smu) +{ + if (smu_v11_0_ack_ac_dc_interrupt(smu)) + dev_err(smu->adev->dev, "Ack AC/DC interrupt Failed!\n"); +} + int smu_v11_0_enable_thermal_alert(struct smu_context *smu) { int ret = 0; @@ -1317,11 +1326,11 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, switch (ctxid) { case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); - smu_v11_0_ack_ac_dc_interrupt(&adev->smu); + schedule_work(&smu->interrupt_work); break; case 0x4: dev_dbg(adev->dev, "Switched to DC mode!\n"); - smu_v11_0_ack_ac_dc_interrupt(&adev->smu); + schedule_work(&smu->interrupt_work); break; case 0x7: /* diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 55a254be5ac2..66c1026489be 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -222,14 +222,16 @@ static int renoir_get_profiling_clk_mask(struct smu_context *smu, *sclk_mask = 0; } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { if (mclk_mask) - *mclk_mask = 0; + /* mclk levels are in reverse order */ + *mclk_mask = NUM_MEMCLK_DPM_LEVELS - 1; } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { if(sclk_mask) /* The sclk as gfxclk and has three level about max/min/current */ *sclk_mask = 3 - 1; if(mclk_mask) - *mclk_mask = NUM_MEMCLK_DPM_LEVELS - 1; + /* mclk levels are in reverse order */ + *mclk_mask = 0; if(soc_mask) *soc_mask = NUM_SOCCLK_DPM_LEVELS - 1; @@ -323,7 +325,7 @@ static int renoir_get_dpm_ultimate_freq(struct smu_context *smu, case SMU_UCLK: case SMU_FCLK: case SMU_MCLK: - ret = renoir_get_dpm_clk_limited(smu, clk_type, 0, min); + ret = renoir_get_dpm_clk_limited(smu, clk_type, NUM_MEMCLK_DPM_LEVELS - 1, min); if (ret) goto failed; break; diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index ebad27c91a0d..27b14eff532c 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -188,7 +188,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, vm_id->last_id_use == rdev->vm_manager.active[vm_id->id]) return NULL; - /* we definately need to flush */ + /* we definitely need to flush */ vm_id->pd_gpu_addr = ~0ll; /* skip over VMID 0, since it is the system VM */ diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index 800721153d51..58557c2263a7 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -117,7 +117,7 @@ int uvd_v1_0_resume(struct radeon_device *rdev) if (r) return r; - /* programm the VCPU memory controller bits 0-27 */ + /* program the VCPU memory controller bits 0-27 */ addr = (rdev->uvd.gpu_addr >> 3) + 16; size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size) >> 3; WREG32(UVD_VCPU_CACHE_OFFSET0, addr); @@ -360,7 +360,7 @@ int uvd_v1_0_start(struct radeon_device *rdev) /* Set the write pointer delay */ WREG32(UVD_RBC_RB_WPTR_CNTL, 0); - /* programm the 4GB memory segment for rptr and ring buffer */ + /* program the 4GB memory segment for rptr and ring buffer */ WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) | (0x7 << 16) | (0x1 << 31)); diff --git a/drivers/gpu/drm/radeon/uvd_v2_2.c b/drivers/gpu/drm/radeon/uvd_v2_2.c index 23b18edda20e..6266167886d9 100644 --- a/drivers/gpu/drm/radeon/uvd_v2_2.c +++ b/drivers/gpu/drm/radeon/uvd_v2_2.c @@ -109,7 +109,7 @@ int uvd_v2_2_resume(struct radeon_device *rdev) if (r) return r; - /* programm the VCPU memory controller bits 0-27 */ + /* program the VCPU memory controller bits 0-27 */ addr = rdev->uvd.gpu_addr >> 3; size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3; WREG32(UVD_VCPU_CACHE_OFFSET0, addr); diff --git a/drivers/gpu/drm/radeon/uvd_v4_2.c b/drivers/gpu/drm/radeon/uvd_v4_2.c index dc54fa4aaea8..f9e97fa63674 100644 --- a/drivers/gpu/drm/radeon/uvd_v4_2.c +++ b/drivers/gpu/drm/radeon/uvd_v4_2.c @@ -40,7 +40,7 @@ int uvd_v4_2_resume(struct radeon_device *rdev) uint64_t addr; uint32_t size; - /* programm the VCPU memory controller bits 0-27 */ + /* program the VCPU memory controller bits 0-27 */ /* skip over the header of the new firmware format */ if (rdev->uvd.fw_header_present) |