diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 571 |
1 files changed, 324 insertions, 247 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 536287ddd2ec..2957702fca0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -366,7 +366,89 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = { SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR), - SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST) + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST), + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) +}; + +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = { + /* compute registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS) +}; + +static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = { + /* gfx queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CSMD_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_MAPPED), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUE_MGR_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_CONTROL0), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_CSMD_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI) }; static const struct soc15_reg_golden golden_settings_gc_10_1[] = { @@ -3651,14 +3733,8 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, enum amdgpu_unmap_queues_action action, u64 gpu_addr, u64 seq) { - struct amdgpu_device *adev = kiq_ring->adev; uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; - if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { - amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); - return; - } - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ PACKET3_UNMAP_QUEUES_ACTION(action) | @@ -3916,33 +3992,18 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; - - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -3969,12 +4030,10 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -4057,7 +4116,6 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { - char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -4072,27 +4130,27 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) wks = "_wks"; amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", ucode_prefix, wks); - err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + "amdgpu/%s_pfp%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", ucode_prefix, wks); - err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + "amdgpu/%s_me%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", ucode_prefix, wks); - err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + "amdgpu/%s_ce%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); - err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; @@ -4107,15 +4165,15 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) goto out; } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", ucode_prefix, wks); - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", ucode_prefix, wks); - err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name); + err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + "amdgpu/%s_mec2%s.bin", ucode_prefix, wks); if (!err) { amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); @@ -4583,19 +4641,44 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio, NULL); } -static void gfx_v10_0_alloc_dump_mem(struct amdgpu_device *adev) +static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) { uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); uint32_t *ptr; + uint32_t inst; ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); if (ptr == NULL) { - DRM_ERROR("Failed to allocate memory for IP Dump\n"); - adev->gfx.ip_dump = NULL; - adev->gfx.reg_count = 0; + DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); + adev->gfx.ip_dump_core = NULL; } else { - adev->gfx.ip_dump = ptr; - adev->gfx.reg_count = reg_count; + adev->gfx.ip_dump_core = ptr; + } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_10); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } + + /* Allocate memory for gfx queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10); + inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * + adev->gfx.me.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); + adev->gfx.ip_dump_gfx_queues = NULL; + } else { + adev->gfx.ip_dump_gfx_queues = ptr; } } @@ -4724,18 +4807,16 @@ static int gfx_v10_0_sw_init(void *handle) } } - if (!adev->enable_mes_kiq) { - r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0); - if (r) { - DRM_ERROR("Failed to init KIQ BOs!\n"); - return r; - } - - r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); - if (r) - return r; + r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; } + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); + if (r) + return r; + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0); if (r) return r; @@ -4751,7 +4832,7 @@ static int gfx_v10_0_sw_init(void *handle) gfx_v10_0_gpu_early_init(adev); - gfx_v10_0_alloc_dump_mem(adev); + gfx_v10_0_alloc_ip_dump(adev); return 0; } @@ -4789,10 +4870,8 @@ static int gfx_v10_0_sw_fini(void *handle) amdgpu_gfx_mqd_sw_fini(adev, 0); - if (!adev->enable_mes_kiq) { - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); - amdgpu_gfx_kiq_fini(adev, 0); - } + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); + amdgpu_gfx_kiq_fini(adev, 0); gfx_v10_0_pfp_fini(adev); gfx_v10_0_ce_fini(adev); @@ -4805,7 +4884,9 @@ static int gfx_v10_0_sw_fini(void *handle) gfx_v10_0_free_microcode(adev); - kfree(adev->gfx.ip_dump); + kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); + kfree(adev->gfx.ip_dump_gfx_queues); return 0; } @@ -6994,10 +7075,7 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev) return r; } - if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) - r = amdgpu_mes_kiq_hw_init(adev); - else - r = gfx_v10_0_kiq_resume(adev); + r = gfx_v10_0_kiq_resume(adev); if (r) return r; @@ -7246,11 +7324,9 @@ static int gfx_v10_0_hw_init(void *handle) * loaded firstly, so in direct type, it has to load smc ucode * here before rlc. */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_pm_load_smu_firmware(adev, NULL); - if (r) - return r; - } + r = amdgpu_pm_load_smu_firmware(adev, NULL); + if (r) + return r; gfx_v10_0_disable_gpa_mode(adev); } @@ -8052,15 +8128,24 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid) { - u32 data; + u32 reg, pre_data, data; + reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); /* not for *_SOC15 */ - data = RREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL); + if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) + pre_data = RREG32_NO_KIQ(reg); + else + pre_data = RREG32(reg); - data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; + data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; - WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); + if (pre_data != data) { + if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { + WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); + } else + WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); + } } static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) @@ -8309,45 +8394,17 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - AMDGPU_MES_PRIORITY_LEVEL_NORMAL); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -8372,42 +8429,13 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - AMDGPU_MES_PRIORITY_LEVEL_NORMAL); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx10 now */ - } + BUG(); /* only DOORBELL method supported on gfx10 now */ } } @@ -8466,10 +8494,6 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); } - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -8489,10 +8513,6 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned int vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - /* Currently, there is a high possibility to get wave ID mismatch * between ME and GDS, leading to a hw deadlock, because ME generates * different wave IDs than the GDS expects. This situation happens @@ -8550,8 +8570,7 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -8579,10 +8598,7 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v10_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { @@ -8733,19 +8749,9 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v10_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - } else { - offset = offsetof(struct v10_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - } + offset = offsetof(struct v10_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | @@ -8771,28 +8777,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v10_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v10_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); @@ -9044,49 +9035,34 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev, int i; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; - - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); - } else { - me_id = (entry->ring_id & 0x0c) >> 2; - pipe_id = (entry->ring_id & 0x03) >> 0; - queue_id = (entry->ring_id & 0x70) >> 4; + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; - switch (me_id) { - case 0: - if (pipe_id == 0) - amdgpu_fence_process(&adev->gfx.gfx_ring[0]); - else - amdgpu_fence_process(&adev->gfx.gfx_ring[1]); - break; - case 1: - case 2: - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - /* Per-queue interrupt is supported for MEC starting from VI. - * The interrupt can only be enabled/disabled per pipe instead - * of per queue. - */ - if ((ring->me == me_id) && - (ring->pipe == pipe_id) && - (ring->queue == queue_id)) - amdgpu_fence_process(ring); - } - break; + switch (me_id) { + case 0: + if (pipe_id == 0) + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + else + amdgpu_fence_process(&adev->gfx.gfx_ring[1]); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + /* Per-queue interrupt is supported for MEC starting from VI. + * The interrupt can only be enabled/disabled per pipe instead + * of per queue. + */ + if ((ring->me == me_id) && + (ring->pipe == pipe_id) && + (ring->queue == queue_id)) + amdgpu_fence_process(ring); } + break; } return 0; @@ -9270,30 +9246,131 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) static void gfx_v10_ip_print(void *handle, struct drm_printer *p) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - uint32_t i; + uint32_t i, j, k, reg, index = 0; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); - if (!adev->gfx.ip_dump) + if (!adev->gfx.ip_dump_core) return; for (i = 0; i < reg_count; i++) drm_printf(p, "%-50s \t 0x%08x\n", gc_reg_list_10_1[i].reg_name, - adev->gfx.ip_dump[i]); + adev->gfx.ip_dump_core[i]); + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_10); + drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_10[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); + } + index += reg_count; + } + } + } + + /* print gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10); + drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", + adev->gfx.me.num_me, + adev->gfx.me.num_pipe_per_me, + adev->gfx.me.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_gfx_queue_reg_list_10[reg].reg_name, + adev->gfx.ip_dump_gfx_queues[index + reg]); + } + index += reg_count; + } + } + } } static void gfx_v10_ip_dump(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - uint32_t i; + uint32_t i, j, k, reg, index = 0; uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); - if (!adev->gfx.ip_dump) + if (!adev->gfx.ip_dump_core) return; amdgpu_gfx_off_ctrl(adev, false); for (i = 0; i < reg_count; i++) - adev->gfx.ip_dump[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i])); + adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i])); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_10); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_10[reg])); + } + index += reg_count; + } + } + } + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + nv_grbm_select(adev, i, j, k, 0); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_gfx_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_gfx_queue_reg_list_10[reg])); + } + index += reg_count; + } + } + } + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); amdgpu_gfx_off_ctrl(adev, true); } |