diff options
author | Dave Airlie <airlied@redhat.com> | 2019-05-31 09:33:29 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2019-05-31 10:04:39 +1000 |
commit | 91c1ead6aee22d4595f50ba66070b94a4a8f84a9 (patch) | |
tree | 066ffa1c352b6257dd37cda6a1df53159e133f2e | |
parent | 14ee642c2ab0a3d8a1ded11fade692d8b77172b9 (diff) | |
parent | cf401e2856b27b2deeada498eab864e2a50cf219 (diff) |
Merge branch 'drm-next-5.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
New stuff for 5.3:
- Add new thermal sensors for vega asics
- Various RAS fixes
- Add sysfs interface for memory interface utilization
- Use HMM rather than mmu notifier for user pages
- Expose xgmi topology via kfd
- SR-IOV fixes
- Fixes for manual driver reload
- Add unique identifier for vega asics
- Clean up user fence handling with UVD/VCE/VCN blocks
- Convert DC to use core bpc attribute rather than a custom one
- Add GWS support for KFD
- Vega powerplay improvements
- Add CRC support for DCE 12
- SR-IOV support for new security policy
- Various cleanups
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190529220944.14464-1-alexander.deucher@amd.com
202 files changed, 6414 insertions, 2476 deletions
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst index a740e491dfcc..86138798128f 100644 --- a/Documentation/gpu/amdgpu.rst +++ b/Documentation/gpu/amdgpu.rst @@ -70,6 +70,26 @@ Interrupt Handling .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c :internal: +AMDGPU XGMI Support +=================== + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c + :doc: AMDGPU XGMI Support + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c + :internal: + +AMDGPU RAS debugfs control interface +==================================== + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c + :doc: AMDGPU RAS debugfs control interface + + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c + :internal: + + GPU Power/Thermal Controls and Monitoring ========================================= diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 844f0a162981..a04f2fc7bf37 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -27,10 +27,11 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU - select MMU_NOTIFIER + depends on ARCH_HAS_HMM + select HMM_MIRROR help - This option selects CONFIG_MMU_NOTIFIER if it isn't already - selected to enabled full userptr support. + This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it + isn't already selected to enabled full userptr support. config DRM_AMDGPU_GART_DEBUGFS bool "Allow GART access through debugfs" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index fdd0ca4b0f0b..57ce44cc3226 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ @@ -173,7 +173,7 @@ endif amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o include $(FULL_AMD_PATH)/powerplay/Makefile diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 14398f55f602..58f8f132904d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -118,7 +118,6 @@ extern int amdgpu_disp_priority; extern int amdgpu_hw_i2c; extern int amdgpu_pcie_gen2; extern int amdgpu_msi; -extern int amdgpu_lockup_timeout; extern int amdgpu_dpm; extern int amdgpu_fw_load_type; extern int amdgpu_aspm; @@ -211,6 +210,7 @@ struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; struct amdgpu_atif; +struct kfd_vm_fault_info; enum amdgpu_cp_irq { AMDGPU_CP_IRQ_GFX_EOP = 0, @@ -415,6 +415,7 @@ struct amdgpu_fpriv { }; int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); +int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev); int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); @@ -558,6 +559,8 @@ struct amdgpu_asic_funcs { uint64_t *count1); /* do we need to reset the asic at init time (e.g., kexec) */ bool (*need_reset_on_init)(struct amdgpu_device *adev); + /* PCIe replay counter */ + uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); }; /* @@ -639,6 +642,11 @@ struct nbio_hdp_flush_reg { u32 ref_and_mask_sdma1; }; +struct amdgpu_mmio_remap { + u32 reg_offset; + resource_size_t bus_addr; +}; + struct amdgpu_nbio_funcs { const struct nbio_hdp_flush_reg *hdp_flush_reg; u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); @@ -666,6 +674,7 @@ struct amdgpu_nbio_funcs { void (*ih_control)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev); void (*detect_hw_virt)(struct amdgpu_device *adev); + void (*remap_hdp_registers)(struct amdgpu_device *adev); }; struct amdgpu_df_funcs { @@ -680,6 +689,12 @@ struct amdgpu_df_funcs { u32 *flags); void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, bool enable); + int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, + int is_enable); + int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, + int is_disable); + void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, + uint64_t *count); }; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { @@ -764,6 +779,7 @@ struct amdgpu_device { void __iomem *rmmio; /* protects concurrent MM_INDEX/DATA based register access */ spinlock_t mmio_idx_lock; + struct amdgpu_mmio_remap rmmio_remap; /* protects concurrent SMC based register access */ spinlock_t smc_idx_lock; amdgpu_rreg_t smc_rreg; @@ -936,6 +952,13 @@ struct amdgpu_device { struct work_struct xgmi_reset_work; bool in_baco_reset; + + long gfx_timeout; + long sdma_timeout; + long video_timeout; + long compute_timeout; + + uint64_t unique_id; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1065,6 +1088,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) +#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); @@ -1081,6 +1105,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 array_size); bool amdgpu_device_is_px(struct drm_device *dev); +bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev); + /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) void amdgpu_register_atpx_handler(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index aeead072fa79..4af3989e4a75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -25,8 +25,10 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_gfx.h" +#include "amdgpu_dma_buf.h" #include <linux/module.h> #include <linux/dma-buf.h> +#include "amdgpu_xgmi.h" static const unsigned int compute_vmid_bitmap = 0xFF00; @@ -148,7 +150,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) }; /* this is going to have a few of the MSBs set that we need to - * clear */ + * clear + */ bitmap_complement(gpu_resources.queue_bitmap, adev->gfx.mec.queue_bitmap, KGD_MAX_QUEUES); @@ -162,7 +165,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) gpu_resources.queue_bitmap); /* According to linux/bitmap.h we shouldn't use bitmap_clear if - * nbits is not compile time constant */ + * nbits is not compile time constant + */ last_valid_bit = 1 /* only first MEC can have compute queues */ * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; @@ -335,6 +339,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) amdgpu_bo_unref(&(bo)); } +int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, + void **mem_obj) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_bo *bo = NULL; + struct amdgpu_bo_param bp; + int r; + + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = 1; + bp.domain = AMDGPU_GEM_DOMAIN_GWS; + bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + bp.type = ttm_bo_type_device; + bp.resv = NULL; + + r = amdgpu_bo_create(adev, &bp, &bo); + if (r) { + dev_err(adev->dev, + "failed to allocate gws BO for amdkfd (%d)\n", r); + return r; + } + + *mem_obj = bo; + return 0; +} + +void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj) +{ + struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; + + amdgpu_bo_unref(&bo); +} + uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { @@ -518,6 +556,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) return adev->gmc.xgmi.hive_id; } +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src) +{ + struct amdgpu_device *peer_adev = (struct amdgpu_device *)src; + struct amdgpu_device *adev = (struct amdgpu_device *)dst; + int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); + + if (ret < 0) { + DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n", + adev->gmc.xgmi.physical_node_id, + peer_adev->gmc.xgmi.physical_node_id, ret); + ret = 0; + } + return (uint8_t)ret; +} + +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->rmmio_remap.bus_addr; +} + +uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->gds.gws_size; +} int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4e37fa7e85b1..f968bf147c5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -61,7 +61,6 @@ struct kgd_mem { atomic_t invalid; struct amdkfd_process_info *process_info; - struct page **user_pages; struct amdgpu_sync sync; @@ -154,6 +153,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); +int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj); +void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj); +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem); +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem); uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, @@ -169,6 +172,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, uint32_t *flags); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); +uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); #define read_user_wptr(mmptr, wptr, dst) \ ({ \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index fa09e11a600c..c6abcf72e822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; - pr_debug("kfd: sdma base address: 0x%x\n", retval); + pr_debug("sdma base address: 0x%x\n", retval); return retval; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index fec3a6aa1de6..4e8b4e949926 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; - pr_debug("kfd: sdma base address: 0x%x\n", retval); + pr_debug("sdma base address: 0x%x\n", retval); return retval; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index ef3d93b995b2..d5af41143d12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, lock_srbm(kgd, 0, 0, 0, vmid); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); /* APE1 no longer exists on GFX9 */ unlock_srbm(kgd); @@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); } /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ @@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32(reg, mqd_hqd[reg - hqd_base]); + WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); if (wptr) { /* Don't read wptr with get_user because the user @@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), lower_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), upper_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), lower_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); release_queue(kgd); @@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, acquire_queue(kgd, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) - WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); switch (reset_type) { case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: @@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, break; } - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { @@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - /* Use legacy mode tlb invalidation. - * - * Currently on Raven the code below is broken for anything but - * legacy mode due to a MMHUB power gating problem. A workaround - * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ - * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack - * bit. - * - * TODO 1: agree on the right set of invalidation registers for - * KFD use. Use the last one for now. Invalidate both GC and - * MMHUB. - * - * TODO 2: support range-based invalidation, requires kfg2kgd - * interface change - */ - amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); -} - -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type) { signed long r; uint32_t seq; @@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) PACKET3_INVALIDATE_TLBS_DST_SEL(1) | PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | PACKET3_INVALIDATE_TLBS_PASID(pasid) | - PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); @@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) struct amdgpu_device *adev = (struct amdgpu_device *) kgd; int vmid; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + uint32_t flush_type = 0; if (adev->in_gpu_reset) return -EIO; + if (adev->gmc.xgmi.num_physical_nodes && + adev->asic_type == CHIP_VEGA20) + flush_type = 2; if (ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid); + return invalidate_tlbs_with_kiq(adev, pasid, flush_type); for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) @@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) == pasid) { - write_vmid_invalidate_request(kgd, vmid); + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + flush_type); break; } } @@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) return 0; } - write_vmid_invalidate_request(kgd, vmid); + /* Use legacy mode tlb invalidation. + * + * Currently on Raven the code below is broken for anything but + * legacy mode due to a MMHUB power gating problem. A workaround + * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ + * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack + * bit. + * + * TODO 1: agree on the right set of invalidation registers for + * KFD use. Use the last one for now. Invalidate both GC and + * MMHUB. + * + * TODO 2: support range-based invalidation, requires kfg2kgd + * interface change + */ + amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); return 0; } @@ -838,7 +837,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, mutex_lock(&adev->grbm_idx_mutex); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, @@ -848,7 +847,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); mutex_unlock(&adev->grbm_idx_mutex); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a6e5184d436c..87177ed37dd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -30,6 +30,7 @@ #include "amdgpu_object.h" #include "amdgpu_vm.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_dma_buf.h" /* Special VM and GART address alignment needed for VI pre-Fiji due to * a HW bug. @@ -456,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, mutex_unlock(&process_info->lock); } +static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, + struct amdkfd_process_info *process_info) +{ + struct ttm_validate_buffer *bo_list_entry; + + bo_list_entry = &mem->validate_list; + mutex_lock(&process_info->lock); + list_del(&bo_list_entry->head); + mutex_unlock(&process_info->lock); +} + /* Initializes user pages. It registers the MMU notifier and validates * the userptr BO in the GTT domain. * @@ -491,28 +503,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - /* If no restore worker is running concurrently, user_pages - * should not be allocated - */ - WARN(mem->user_pages, "Leaking user_pages array"); - - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", __func__); - ret = -ENOMEM; - goto unregister_out; - } - - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto free_out; + goto unregister_out; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); - ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -525,11 +521,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: - kvfree(mem->user_pages); - mem->user_pages = NULL; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -588,7 +580,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); @@ -652,7 +643,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); i = 0; @@ -896,6 +886,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; + ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); + if (ret) + goto reserve_shared_fail; amdgpu_bo_fence(vm->root.base.bo, &vm->process_info->eviction_fence->base, true); amdgpu_bo_unreserve(vm->root.base.bo); @@ -909,6 +902,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, return 0; +reserve_shared_fail: wait_pd_fail: validate_pd_fail: amdgpu_bo_unreserve(vm->root.base.bo); @@ -1109,7 +1103,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (!offset || !*offset) return -EINVAL; user_addr = *offset; - } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { + } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL | + ALLOC_MEM_FLAGS_MMIO_REMAP)) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_CPU; bo_type = ttm_bo_type_sg; @@ -1199,12 +1194,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (user_addr) { ret = init_user_pages(*mem, current->mm, user_addr); - if (ret) { - mutex_lock(&avm->process_info->lock); - list_del(&(*mem)->validate_list.head); - mutex_unlock(&avm->process_info->lock); + if (ret) goto allocate_init_user_pages_failed; - } } if (offset) @@ -1213,6 +1204,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( return 0; allocate_init_user_pages_failed: + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); amdgpu_bo_unref(&bo); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; @@ -1262,15 +1254,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); - /* Free user pages if necessary */ - if (mem->user_pages) { - pr_debug("%s: Freeing user_pages array\n", __func__); - if (mem->user_pages[0]) - release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages); - kvfree(mem->user_pages); - } - ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1294,8 +1277,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the sync object */ amdgpu_sync_free(&mem->sync); - /* If the SG is not NULL, it's one we created for a doorbell - * BO. We need to free it. + /* If the SG is not NULL, it's one we created for a doorbell or mmio + * remap BO. We need to free it. */ if (mem->bo->tbo.sg) { sg_free_table(mem->bo->tbo.sg); @@ -1409,7 +1392,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( ret = map_bo_to_gpuvm(adev, entry, ctx.sync, is_invalid_userptr); if (ret) { - pr_err("Failed to map radeon bo to gpuvm\n"); + pr_err("Failed to map bo to gpuvm\n"); goto map_bo_to_gpuvm_failed; } @@ -1744,25 +1727,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; - if (!mem->user_pages) { - mem->user_pages = - kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", - __func__); - return -ENOMEM; - } - } else if (mem->user_pages[0]) { - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); - } - /* Get updated user pages */ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, - mem->user_pages); + bo->tbo.ttm->pages); if (ret) { - mem->user_pages[0] = NULL; + bo->tbo.ttm->pages[0] = NULL; pr_info("%s: Failed to get user pages: %d\n", __func__, ret); /* Pretend it succeeded. It will fail later @@ -1771,17 +1740,28 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * stalled user mode queues. */ } - - /* Mark the BO as valid unless it was invalidated - * again concurrently - */ - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) - return -EAGAIN; } return 0; } +/* Remove invalid userptr BOs from hmm track list + * + * Stop HMM track the userptr update + */ +static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info) +{ + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + bo = mem->bo; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } +} + /* Validate invalid userptr BOs * * Validates BOs on the userptr_inval_list, and moves them back to the @@ -1806,7 +1786,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) GFP_KERNEL); if (!pd_bo_list_entries) { pr_err("%s: Failed to allocate PD BO list entries\n", __func__); - return -ENOMEM; + ret = -ENOMEM; + goto out_no_mem; } INIT_LIST_HEAD(&resv_list); @@ -1830,7 +1811,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); WARN(!list_empty(&duplicates), "Duplicates should be empty"); if (ret) - goto out; + goto out_free; amdgpu_sync_create(&sync); @@ -1846,10 +1827,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) bo = mem->bo; - /* Copy pages array and validate the BO if we got user pages */ - if (mem->user_pages[0]) { - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, - mem->user_pages); + /* Validate the BO if we got user pages */ + if (bo->tbo.ttm->pages[0]) { amdgpu_bo_placement_from_domain(bo, mem->domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) { @@ -1858,16 +1837,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } - /* Validate succeeded, now the BO owns the pages, free - * our copy of the pointer array. Put this BO back on - * the userptr_valid_list. If we need to revalidate - * it, we need to start from scratch. - */ - kvfree(mem->user_pages); - mem->user_pages = NULL; list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); + /* Stop HMM track the userptr update. We dont check the return + * value for concurrent CPU page table update because we will + * reschedule the restore worker if process_info->evicted_bos + * is updated. + */ + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + /* Update mapping. If the BO was not validated * (because we couldn't get user pages), this will * clear the page table entries, which will result in @@ -1897,8 +1876,9 @@ unreserve_out: ttm_eu_backoff_reservation(&ticket, &resv_list); amdgpu_sync_wait(&sync, false); amdgpu_sync_free(&sync); -out: +out_free: kfree(pd_bo_list_entries); +out_no_mem: return ret; } @@ -1963,7 +1943,9 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) * hanging. No point trying again. */ } + unlock_out: + untrack_invalid_user_pages(process_info); mutex_unlock(&process_info->lock); mmput(mm); put_task_struct(usertask); @@ -2130,3 +2112,88 @@ ttm_reserve_fail: kfree(pd_bo_list); return ret; } + +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem) +{ + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; + struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws; + int ret; + + if (!info || !gws) + return -EINVAL; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -EINVAL; + + mutex_init(&(*mem)->lock); + (*mem)->bo = amdgpu_bo_ref(gws_bo); + (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; + (*mem)->process_info = process_info; + add_kgd_mem_to_kfd_bo_list(*mem, process_info, false); + amdgpu_sync_create(&(*mem)->sync); + + + /* Validate gws bo the first time it is added to process */ + mutex_lock(&(*mem)->process_info->lock); + ret = amdgpu_bo_reserve(gws_bo, false); + if (unlikely(ret)) { + pr_err("Reserve gws bo failed %d\n", ret); + goto bo_reservation_failure; + } + + ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true); + if (ret) { + pr_err("GWS BO validate failed %d\n", ret); + goto bo_validation_failure; + } + /* GWS resource is shared b/t amdgpu and amdkfd + * Add process eviction fence to bo so they can + * evict each other. + */ + amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); + amdgpu_bo_unreserve(gws_bo); + mutex_unlock(&(*mem)->process_info->lock); + + return ret; + +bo_validation_failure: + amdgpu_bo_unreserve(gws_bo); +bo_reservation_failure: + mutex_unlock(&(*mem)->process_info->lock); + amdgpu_sync_free(&(*mem)->sync); + remove_kgd_mem_from_kfd_bo_list(*mem, process_info); + amdgpu_bo_unref(&gws_bo); + mutex_destroy(&(*mem)->lock); + kfree(*mem); + *mem = NULL; + return ret; +} + +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) +{ + int ret; + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; + struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; + struct amdgpu_bo *gws_bo = kgd_mem->bo; + + /* Remove BO from process's validate list so restore worker won't touch + * it anymore + */ + remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info); + + ret = amdgpu_bo_reserve(gws_bo, false); + if (unlikely(ret)) { + pr_err("Reserve gws bo failed %d\n", ret); + //TODO add BO back to validate_list? + return ret; + } + amdgpu_amdkfd_remove_eviction_fence(gws_bo, + process_info->eviction_fence); + amdgpu_bo_unreserve(gws_bo); + amdgpu_sync_free(&kgd_mem->sync); + amdgpu_bo_unref(&gws_bo); + mutex_destroy(&kgd_mem->lock); + kfree(mem); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 5c79da8e1150..d497467b7fc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -81,9 +81,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, return -ENOMEM; kref_init(&list->refcount); - list->gds_obj = adev->gds.gds_gfx_bo; - list->gws_obj = adev->gds.gws_gfx_bo; - list->oa_obj = adev->gds.oa_gfx_bo; + list->gds_obj = NULL; + list->gws_obj = NULL; + list->oa_obj = NULL; array = amdgpu_bo_list_array_entry(list, 0); memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 7c5f5d1601e6..a130e766cbdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; - int user_invalidated; + bool user_invalidated; }; struct amdgpu_bo_list { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2f6239b6be6f..d72cc583ebd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.tv.bo = &bo->tbo; /* One for TTM and one for the CS job */ p->uf_entry.tv.num_shared = 2; - p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); @@ -542,14 +541,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, if (usermm && usermm != current->mm) return -EPERM; - /* Check if we have user pages and nobody bound the BO already */ - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - lobj->user_pages) { + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && + lobj->user_invalidated && lobj->user_pages) { amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); binding_userptr = true; @@ -580,7 +579,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *gds; struct amdgpu_bo *gws; struct amdgpu_bo *oa; - unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); @@ -616,79 +614,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); - while (1) { - struct list_head need_pages; - - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto error_free_pages; - } - - INIT_LIST_HEAD(&need_pages); - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, - &e->user_invalidated) && e->user_pages) { - - /* We acquired a page array, but somebody - * invalidated it. Free it and try again - */ - release_pages(e->user_pages, - bo->tbo.ttm->num_pages); - kvfree(e->user_pages); - e->user_pages = NULL; - } - - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - !e->user_pages) { - list_del(&e->tv.head); - list_add(&e->tv.head, &need_pages); - - amdgpu_bo_unreserve(bo); - } + /* Get userptr backing pages. If pages are updated after registered + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do + * amdgpu_ttm_backend_bind() to flush and invalidate new pages + */ + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + bool userpage_invalidated = false; + int i; + + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!e->user_pages) { + DRM_ERROR("calloc failure\n"); + return -ENOMEM; } - if (list_empty(&need_pages)) - break; - - /* Unreserve everything again. */ - ttm_eu_backoff_reservation(&p->ticket, &p->validated); - - /* We tried too many times, just abort */ - if (!--tries) { - r = -EDEADLK; - DRM_ERROR("deadlock in %s\n", __func__); - goto error_free_pages; + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); + if (r) { + kvfree(e->user_pages); + e->user_pages = NULL; + return r; } - /* Fill the page arrays for all userptrs. */ - list_for_each_entry(e, &need_pages, tv.head) { - struct ttm_tt *ttm = e->tv.bo->ttm; - - e->user_pages = kvmalloc_array(ttm->num_pages, - sizeof(struct page*), - GFP_KERNEL | __GFP_ZERO); - if (!e->user_pages) { - r = -ENOMEM; - DRM_ERROR("calloc failure in %s\n", __func__); - goto error_free_pages; - } - - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); - if (r) { - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); - kvfree(e->user_pages); - e->user_pages = NULL; - goto error_free_pages; + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { + userpage_invalidated = true; + break; } } + e->user_invalidated = userpage_invalidated; + } - /* And try again. */ - list_splice(&need_pages, &p->validated); + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + goto out; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -757,17 +721,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, error_validate: if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - -error_free_pages: - - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - if (!e->user_pages) - continue; - - release_pages(e->user_pages, e->tv.bo->ttm->num_pages); - kvfree(e->user_pages); - } - +out: return r; } @@ -1054,11 +1008,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, j++; } - /* UVD & VCE fw doesn't support user fences */ + /* MM engine doesn't support user fences */ ring = to_amdgpu_ring(parser->entity->rq->sched); - if (parser->job->uf_addr && ( - ring->funcs->type == AMDGPU_RING_TYPE_UVD || - ring->funcs->type == AMDGPU_RING_TYPE_VCE)) + if (parser->job->uf_addr && ring->funcs->no_user_fence) return -EINVAL; return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); @@ -1328,7 +1280,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; uint64_t seq; - int r; job = p->job; @@ -1338,15 +1289,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock */ + /* No memory allocation is allowed while holding the mn lock. + * p->mn is hold until amdgpu_cs_submit is finished and fence is added + * to BOs. + */ amdgpu_mn_lock(p->mn); + + /* If userptr are invalidated after amdgpu_cs_parser_bos(), return + * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. + */ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - r = -ERESTARTSYS; - goto error_abort; - } + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } + if (r) { + r = -EAGAIN; + goto error_abort; } job->owner = p->filp; @@ -1442,6 +1401,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9f282e971197..0ffa6733f2b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -97,6 +97,28 @@ static const char *amdgpu_asic_name[] = { "LAST", }; +/** + * DOC: pcie_replay_count + * + * The amdgpu driver provides a sysfs API for reporting the total number + * of PCIe replays (NAKs) + * The file pcie_replay_count is used for this and returns the total + * number of replays as a sum of the NAKs generated and NAKs received + */ + +static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); + + return snprintf(buf, PAGE_SIZE, "%llu\n", cnt); +} + +static DEVICE_ATTR(pcie_replay_count, S_IRUGO, + amdgpu_device_get_pcie_replay_count, NULL); + static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); /** @@ -910,8 +932,10 @@ def_value: * Validates certain module parameters and updates * the associated values used by the driver (all asics). */ -static void amdgpu_device_check_arguments(struct amdgpu_device *adev) +static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { + int ret = 0; + if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -956,12 +980,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_vram_page_split = 1024; } - if (amdgpu_lockup_timeout == 0) { - dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); - amdgpu_lockup_timeout = 10000; + ret = amdgpu_device_get_job_timeout_settings(adev); + if (ret) { + dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); + return ret; } adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + + return ret; } /** @@ -1505,12 +1532,26 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) r = amdgpu_virt_request_full_gpu(adev, true); if (r) return -EAGAIN; + + /* query the reg access mode at the very beginning */ + amdgpu_virt_init_reg_access_mode(adev); } adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + /* Read BIOS */ + if (!amdgpu_get_bios(adev)) + return -EINVAL; + + r = amdgpu_atombios_init(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); + return r; + } + for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { DRM_ERROR("disabled ip block: %d <%s>\n", @@ -1550,6 +1591,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) if (adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { r = adev->ip_blocks[i].version->funcs->hw_init(adev); if (r) { @@ -2473,7 +2515,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->lock_reset); mutex_init(&adev->virt.dpm_mutex); - amdgpu_device_check_arguments(adev); + r = amdgpu_device_check_arguments(adev); + if (r) + return r; spin_lock_init(&adev->mmio_idx_lock); spin_lock_init(&adev->smc_idx_lock); @@ -2558,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, goto fence_driver_init; } - /* Read BIOS */ - if (!amdgpu_get_bios(adev)) { - r = -EINVAL; - goto failed; - } - - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - goto failed; - } - /* detect if we are with an SRIOV vbios */ amdgpu_device_detect_sriov_bios(adev); @@ -2672,6 +2703,10 @@ fence_driver_init: if (r) DRM_ERROR("registering pm debugfs failed (%d).\n", r); + r = amdgpu_ucode_sysfs_init(adev); + if (r) + DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); + r = amdgpu_debugfs_gem_init(adev); if (r) DRM_ERROR("registering gem debugfs failed (%d).\n", r); @@ -2712,7 +2747,13 @@ fence_driver_init: } /* must succeed. */ - amdgpu_ras_post_init(adev); + amdgpu_ras_resume(adev); + + r = device_create_file(adev->dev, &dev_attr_pcie_replay_count); + if (r) { + dev_err(adev->dev, "Could not create pcie_replay_count"); + return r; + } return 0; @@ -2777,6 +2818,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->rmmio = NULL; amdgpu_device_doorbell_fini(adev); amdgpu_debugfs_regs_cleanup(adev); + device_remove_file(adev->dev, &dev_attr_pcie_replay_count); + amdgpu_ucode_sysfs_fini(adev); } @@ -2857,6 +2900,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) amdgpu_amdkfd_suspend(adev); + amdgpu_ras_suspend(adev); + r = amdgpu_device_ip_suspend_phase1(adev); /* evict vram memory */ @@ -2977,6 +3022,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) drm_kms_helper_poll_enable(dev); + amdgpu_ras_resume(adev); + /* * Most of the connector probing functions try to acquire runtime pm * refs to ensure that the GPU is powered on when connector polling is @@ -3455,6 +3502,13 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (vram_lost) amdgpu_device_fill_reset_magic(tmp_adev); + r = amdgpu_device_ip_late_init(tmp_adev); + if (r) + goto out; + + /* must succeed. */ + amdgpu_ras_resume(tmp_adev); + /* Update PSP FW topology after reset */ if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) r = amdgpu_xgmi_update_topology(hive, tmp_adev); @@ -3695,43 +3749,6 @@ skip_hw_reset: return r; } -static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, - enum pci_bus_speed *speed, - enum pcie_link_width *width) -{ - struct pci_dev *pdev = adev->pdev; - enum pci_bus_speed cur_speed; - enum pcie_link_width cur_width; - u32 ret = 1; - - *speed = PCI_SPEED_UNKNOWN; - *width = PCIE_LNK_WIDTH_UNKNOWN; - - while (pdev) { - cur_speed = pcie_get_speed_cap(pdev); - cur_width = pcie_get_width_cap(pdev); - ret = pcie_bandwidth_available(adev->pdev, NULL, - NULL, &cur_width); - if (!ret) - cur_width = PCIE_LNK_WIDTH_RESRV; - - if (cur_speed != PCI_SPEED_UNKNOWN) { - if (*speed == PCI_SPEED_UNKNOWN) - *speed = cur_speed; - else if (cur_speed < *speed) - *speed = cur_speed; - } - - if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) { - if (*width == PCIE_LNK_WIDTH_UNKNOWN) - *width = cur_width; - else if (cur_width < *width) - *width = cur_width; - } - pdev = pci_upstream_bridge(pdev); - } -} - /** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * @@ -3765,8 +3782,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) return; - amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap, - &platform_link_width); + pcie_bandwidth_available(adev->pdev, NULL, + &platform_speed_cap, &platform_link_width); if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b083b219b1a9..30e6ad8a90bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -631,10 +631,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) amdgpu_dither_enum_list, sz); if (amdgpu_device_has_dc_support(adev)) { - adev->mode_info.max_bpc_property = - drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16); - if (!adev->mode_info.max_bpc_property) - return -ENOMEM; adev->mode_info.abm_level_property = drm_property_create_range(adev->ddev, 0, "abm level", 0, 4); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index a38e0fb4a6fe..4711cf1b5bd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -1,5 +1,5 @@ /* - * Copyright 2012 Advanced Micro Devices, Inc. + * Copyright 2019 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -103,7 +103,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) * Returns: * 0 on success or a negative error code on failure. */ -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -137,57 +138,6 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma return ret; } -/** - * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table - * implementation - * @dev: DRM device - * @attach: DMA-buf attachment - * @sg: Scatter/gather table - * - * Imports shared DMA buffer memory exported by another device. - * - * Returns: - * A new GEM BO of the given DRM device, representing the memory - * described by the given DMA-buf attachment and scatter/gather table. - */ -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg) -{ - struct reservation_object *resv = attach->dmabuf->resv; - struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_bo *bo; - struct amdgpu_bo_param bp; - int ret; - - memset(&bp, 0, sizeof(bp)); - bp.size = attach->dmabuf->size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_CPU; - bp.flags = 0; - bp.type = ttm_bo_type_sg; - bp.resv = resv; - ww_mutex_lock(&resv->lock, NULL); - ret = amdgpu_bo_create(adev, &bp, &bo); - if (ret) - goto error; - - bo->tbo.sg = sg; - bo->tbo.ttm->sg = sg; - bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; - bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; - if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) - bo->prime_shared_count = 1; - - ww_mutex_unlock(&resv->lock); - return &bo->gem_base; - -error: - ww_mutex_unlock(&resv->lock); - return ERR_PTR(ret); -} - static int __reservation_object_make_exclusive(struct reservation_object *obj) { @@ -231,7 +181,7 @@ err_fences_put: } /** - * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation + * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation * @dma_buf: Shared DMA buffer * @attach: DMA-buf attachment * @@ -242,8 +192,8 @@ err_fences_put: * Returns: * 0 on success or a negative error code on failure. */ -static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, - struct dma_buf_attachment *attach) +static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) { struct drm_gem_object *obj = dma_buf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -291,15 +241,15 @@ error_detach: } /** - * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation + * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation * @dma_buf: Shared DMA buffer * @attach: DMA-buf attachment * * This is called when a shared DMA buffer no longer needs to be accessible by * another device. For now, simply unpins the buffer from GTT. */ -static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, - struct dma_buf_attachment *attach) +static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) { struct drm_gem_object *obj = dma_buf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -334,7 +284,7 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) } /** - * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation + * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation * @dma_buf: Shared DMA buffer * @direction: Direction of DMA transfer * @@ -345,8 +295,8 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) * Returns: * 0 on success or a negative error code on failure. */ -static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, - enum dma_data_direction direction) +static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction direction) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -374,12 +324,12 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, } const struct dma_buf_ops amdgpu_dmabuf_ops = { - .attach = amdgpu_gem_map_attach, - .detach = amdgpu_gem_map_detach, + .attach = amdgpu_dma_buf_map_attach, + .detach = amdgpu_dma_buf_map_detach, .map_dma_buf = drm_gem_map_dma_buf, .unmap_dma_buf = drm_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, - .begin_cpu_access = amdgpu_gem_begin_cpu_access, + .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, .vunmap = drm_gem_dmabuf_vunmap, @@ -418,6 +368,57 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, } /** + * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table + * implementation + * @dev: DRM device + * @attach: DMA-buf attachment + * @sg: Scatter/gather table + * + * Imports shared DMA buffer memory exported by another device. + * + * Returns: + * A new GEM BO of the given DRM device, representing the memory + * described by the given DMA-buf attachment and scatter/gather table. + */ +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sg) +{ + struct reservation_object *resv = attach->dmabuf->resv; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; + int ret; + + memset(&bp, 0, sizeof(bp)); + bp.size = attach->dmabuf->size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_CPU; + bp.flags = 0; + bp.type = ttm_bo_type_sg; + bp.resv = resv; + ww_mutex_lock(&resv->lock, NULL); + ret = amdgpu_bo_create(adev, &bp, &bo); + if (ret) + goto error; + + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) + bo->prime_shared_count = 1; + + ww_mutex_unlock(&resv->lock); + return &bo->gem_base; + +error: + ww_mutex_unlock(&resv->lock); + return ERR_PTR(ret); +} + +/** * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation * @dev: DRM device * @dma_buf: Shared DMA buffer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h new file mode 100644 index 000000000000..c7056cbe8685 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h @@ -0,0 +1,46 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_DMA_BUF_H__ +#define __AMDGPU_DMA_BUF_H__ + +#include <drm/drm_gem.h> + +struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sg); +struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags); +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); +struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); +void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); +void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma); + +extern const struct dma_buf_ops amdgpu_dmabuf_ops; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index dca35407879d..521dbd0d9af8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal { int min_temp; /* high temperature threshold */ int max_temp; + /* edge max emergency(shutdown) temp */ + int max_edge_emergency_temp; + /* hotspot low temperature threshold */ + int min_hotspot_temp; + /* hotspot high temperature critical threshold */ + int max_hotspot_crit_temp; + /* hotspot max emergency(shutdown) temp */ + int max_hotspot_emergency_temp; + /* memory low temperature threshold */ + int min_mem_temp; + /* memory high temperature critical threshold */ + int max_mem_crit_temp; + /* memory max emergency(shutdown) temp */ + int max_mem_emergency_temp; /* was last interrupt low to high or high to low */ bool high_to_low; /* interrupt source */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1e2cc9d68a05..1f38d6fc1fe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -36,7 +36,7 @@ #include "amdgpu.h" #include "amdgpu_irq.h" -#include "amdgpu_gem.h" +#include "amdgpu_dma_buf.h" #include "amdgpu_amdkfd.h" @@ -81,6 +81,8 @@ #define KMS_DRIVER_MINOR 32 #define KMS_DRIVER_PATCHLEVEL 0 +#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 + int amdgpu_vram_limit = 0; int amdgpu_vis_vram_limit = 0; int amdgpu_gart_size = -1; /* auto */ @@ -93,7 +95,7 @@ int amdgpu_disp_priority = 0; int amdgpu_hw_i2c = 0; int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; -int amdgpu_lockup_timeout = 10000; +char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH]; int amdgpu_dpm = -1; int amdgpu_fw_load_type = -1; int amdgpu_aspm = -1; @@ -227,12 +229,21 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(msi, amdgpu_msi, int, 0444); /** - * DOC: lockup_timeout (int) - * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000. - * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000. - */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)"); -module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); + * DOC: lockup_timeout (string) + * Set GPU scheduler timeout value in ms. + * + * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or + * multiple values specified. 0 and negative values are invalidated. They will be adjusted + * to default timeout. + * - With one value specified, the setting will apply to all non-compute jobs. + * - With multiple values specified, the first one will be for GFX. The second one is for Compute. + * And the third and fourth ones are for SDMA and Video. + * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) + * jobs is 10000. And there is no timeout enforced on compute jobs. + */ +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and no timeout for compute jobs), " + "format is [Non-Compute] or [GFX,Compute,SDMA,Video]"); +module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); /** * DOC: dpm (int) @@ -655,6 +666,16 @@ MODULE_PARM_DESC(noretry, int halt_if_hws_hang; module_param(halt_if_hws_hang, int, 0644); MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); + +/** + * DOC: hws_gws_support(bool) + * Whether HWS support gws barriers. Default value: false (not supported) + * This will be replaced with a MEC firmware version check once firmware + * is ready + */ +bool hws_gws_support; +module_param(hws_gws_support, bool, 0444); +MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)"); #endif /** @@ -1216,6 +1237,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) return 0; } +int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) +{ + char *input = amdgpu_lockup_timeout; + char *timeout_setting = NULL; + int index = 0; + long timeout; + int ret = 0; + + /* + * By default timeout for non compute jobs is 10000. + * And there is no timeout enforced on compute jobs. + */ + adev->gfx_timeout = adev->sdma_timeout = adev->video_timeout = 10000; + adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + + if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { + while ((timeout_setting = strsep(&input, ",")) && + strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { + ret = kstrtol(timeout_setting, 0, &timeout); + if (ret) + return ret; + + /* Invalidate 0 and negative values */ + if (timeout <= 0) { + index++; + continue; + } + + switch (index++) { + case 0: + adev->gfx_timeout = timeout; + break; + case 1: + adev->compute_timeout = timeout; + break; + case 2: + adev->sdma_timeout = timeout; + break; + case 3: + adev->video_timeout = timeout; + break; + default: + break; + } + } + /* + * There is only one value specified and + * it should apply to all non-compute jobs. + */ + if (index == 1) + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + } + + return ret; +} + static bool amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, bool in_vblank_irq, int *vpos, int *hpos, @@ -1230,7 +1307,8 @@ static struct drm_driver kms_driver = { .driver_features = DRIVER_USE_AGP | DRIVER_ATOMIC | DRIVER_GEM | - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, + DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, .load = amdgpu_driver_load_kms, .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 4dee2326b29c..3a483f7e89c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -427,9 +427,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission) { + struct amdgpu_device *adev = ring->adev; long timeout; int r; + if (!adev) + return -EINVAL; + /* Check that num_hw_submission is a power of two */ if ((num_hw_submission & (num_hw_submission - 1)) != 0) return -EINVAL; @@ -451,12 +455,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, /* No need to setup the GPU scheduler for KIQ ring */ if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) { - /* for non-sriov case, no timeout enforce on compute ring */ - if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) - && !amdgpu_sriov_vf(ring->adev)) - timeout = MAX_SCHEDULE_TIMEOUT; - else - timeout = msecs_to_jiffies(amdgpu_lockup_timeout); + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + timeout = adev->gfx_timeout; + break; + case AMDGPU_RING_TYPE_COMPUTE: + /* + * For non-sriov case, no timeout enforce + * on compute ring by default. Unless user + * specifies a timeout for compute ring. + * + * For sriov case, always use the timeout + * as gfx ring + */ + if (!amdgpu_sriov_vf(ring->adev)) + timeout = adev->compute_timeout; + else + timeout = adev->gfx_timeout; + break; + case AMDGPU_RING_TYPE_SDMA: + timeout = adev->sdma_timeout; + break; + default: + timeout = adev->video_timeout; + break; + } r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, num_hw_submission, amdgpu_job_hang_limit, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index f89f5734d985..dad2186f4ed5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -27,26 +27,11 @@ struct amdgpu_ring; struct amdgpu_bo; -struct amdgpu_gds_asic_info { - uint32_t total_size; - uint32_t gfx_partition_size; - uint32_t cs_partition_size; -}; - struct amdgpu_gds { - struct amdgpu_gds_asic_info mem; - struct amdgpu_gds_asic_info gws; - struct amdgpu_gds_asic_info oa; + uint32_t gds_size; + uint32_t gws_size; + uint32_t oa_size; uint32_t gds_compute_max_wave_id; - - /* At present, GDS, GWS and OA resources for gfx (graphics) - * is always pre-allocated and available for graphics operation. - * Such resource is shared between all gfx clients. - * TODO: move this operation to user space - * */ - struct amdgpu_bo* gds_gfx_bo; - struct amdgpu_bo* gws_gfx_bo; - struct amdgpu_bo* oa_gfx_bo; }; struct amdgpu_gds_reg_offset { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d4fcf5475464..7b840367004c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -330,26 +330,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_reserve(bo, true); if (r) - goto free_pages; + goto user_pages_done; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); if (r) - goto free_pages; + goto user_pages_done; } r = drm_gem_handle_create(filp, gobj, &handle); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put_unlocked(gobj); if (r) - return r; + goto user_pages_done; args->handle = handle; - return 0; -free_pages: - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); +user_pages_done: + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); release_object: drm_gem_object_put_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index f1ddfc50bcc7..b8ba6e27c61f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_file *file_priv); unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg); -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gobj, - int flags); -struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf); -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); -void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); -void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); - -extern const struct dma_buf_ops amdgpu_dmabuf_ops; /* * GEM objects. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 0a17fb1af204..7ab1241bd9e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) if (amdgpu_device_should_recover_gpu(ring->adev)) amdgpu_device_gpu_recover(ring->adev, job); + else + drm_sched_suspend_timeout(&ring->sched); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b17d0545728e..edb675103bd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -590,13 +590,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_amdgpu_info_gds gds_info; memset(&gds_info, 0, sizeof(gds_info)); - gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size; - gds_info.compute_partition_size = adev->gds.mem.cs_partition_size; - gds_info.gds_total_size = adev->gds.mem.total_size; - gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size; - gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size; - gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size; - gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size; + gds_info.compute_partition_size = adev->gds.gds_size; + gds_info.gds_total_size = adev->gds.gds_size; + gds_info.gws_per_compute_partition = adev->gds.gws_size; + gds_info.oa_per_compute_partition = adev->gds.oa_size; return copy_to_user(out, &gds_info, min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 58ed401c5996..41ccee49a224 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -45,7 +45,7 @@ #include <linux/firmware.h> #include <linux/module.h> -#include <linux/mmu_notifier.h> +#include <linux/hmm.h> #include <linux/interval_tree.h> #include <drm/drmP.h> #include <drm/drm.h> @@ -58,14 +58,12 @@ * * @adev: amdgpu device pointer * @mm: process address space - * @mn: MMU notifier structure * @type: type of MMU notifier * @work: destruction work item * @node: hash table node to find structure by adev and mn * @lock: rw semaphore protecting the notifier nodes * @objects: interval tree containing amdgpu_mn_nodes - * @read_lock: mutex for recursive locking of @lock - * @recursion: depth of recursion + * @mirror: HMM mirror function support * * Data for each amdgpu device and process address space. */ @@ -73,7 +71,6 @@ struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; struct mm_struct *mm; - struct mmu_notifier mn; enum amdgpu_mn_type type; /* only used on destruction */ @@ -85,8 +82,9 @@ struct amdgpu_mn { /* objects protected by lock */ struct rw_semaphore lock; struct rb_root_cached objects; - struct mutex read_lock; - atomic_t recursion; + + /* HMM mirror */ + struct hmm_mirror mirror; }; /** @@ -103,7 +101,7 @@ struct amdgpu_mn_node { }; /** - * amdgpu_mn_destroy - destroy the MMU notifier + * amdgpu_mn_destroy - destroy the HMM mirror * * @work: previously sheduled work item * @@ -129,28 +127,26 @@ static void amdgpu_mn_destroy(struct work_struct *work) } up_write(&amn->lock); mutex_unlock(&adev->mn_lock); - mmu_notifier_unregister_no_release(&amn->mn, amn->mm); + + hmm_mirror_unregister(&amn->mirror); kfree(amn); } /** - * amdgpu_mn_release - callback to notify about mm destruction + * amdgpu_hmm_mirror_release - callback to notify about mm destruction * - * @mn: our notifier - * @mm: the mm this callback is about + * @mirror: the HMM mirror (mm) this callback is about * - * Shedule a work item to lazy destroy our notifier. + * Shedule a work item to lazy destroy HMM mirror. */ -static void amdgpu_mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); INIT_WORK(&amn->work, amdgpu_mn_destroy); schedule_work(&amn->work); } - /** * amdgpu_mn_lock - take the write side lock for this notifier * @@ -181,14 +177,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) { if (blockable) - mutex_lock(&amn->read_lock); - else if (!mutex_trylock(&amn->read_lock)) + down_read(&amn->lock); + else if (!down_read_trylock(&amn->lock)) return -EAGAIN; - if (atomic_inc_return(&amn->recursion) == 1) - down_read_non_owner(&amn->lock); - mutex_unlock(&amn->read_lock); - return 0; } @@ -199,8 +191,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) */ static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) { - if (atomic_dec_return(&amn->recursion) == 0) - up_read_non_owner(&amn->lock); + up_read(&amn->lock); } /** @@ -229,149 +220,132 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); - - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); } } /** - * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change + * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change * - * @mn: our notifier - * @range: mmu notifier context + * @mirror: the hmm_mirror (mm) is about to update + * @update: the update start, end address * * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, + const struct hmm_update *update) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); + unsigned long start = update->start; + unsigned long end = update->end; + bool blockable = update->blockable; struct interval_tree_node *it; - unsigned long end; /* notification is exclusive, but interval is inclusive */ - end = range->end - 1; + end -= 1; /* TODO we should be able to split locking for interval tree and * amdgpu_mn_invalidate_node */ - if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) + if (amdgpu_mn_read_lock(amn, blockable)) return -EAGAIN; - it = interval_tree_iter_first(&amn->objects, range->start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; - if (!mmu_notifier_range_blockable(range)) { + if (!blockable) { amdgpu_mn_read_unlock(amn); return -EAGAIN; } node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, range->start, end); + it = interval_tree_iter_next(it, start, end); - amdgpu_mn_invalidate_node(node, range->start, end); + amdgpu_mn_invalidate_node(node, start, end); } + amdgpu_mn_read_unlock(amn); + return 0; } /** - * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range + * @mirror: the hmm_mirror (mm) is about to update + * @update: the update start, end address * * We temporarily evict all BOs between start and end. This * necessitates evicting all user-mode queues of the process. The BOs * are restorted in amdgpu_mn_invalidate_range_end_hsa. */ -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, + const struct hmm_update *update) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); + unsigned long start = update->start; + unsigned long end = update->end; + bool blockable = update->blockable; struct interval_tree_node *it; - unsigned long end; /* notification is exclusive, but interval is inclusive */ - end = range->end - 1; + end -= 1; - if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) + if (amdgpu_mn_read_lock(amn, blockable)) return -EAGAIN; - it = interval_tree_iter_first(&amn->objects, range->start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; struct amdgpu_bo *bo; - if (!mmu_notifier_range_blockable(range)) { + if (!blockable) { amdgpu_mn_read_unlock(amn); return -EAGAIN; } node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, range->start, end); + it = interval_tree_iter_next(it, start, end); list_for_each_entry(bo, &node->bos, mn_list) { struct kgd_mem *mem = bo->kfd_bo; if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - range->start, - end)) - amdgpu_amdkfd_evict_userptr(mem, range->mm); + start, end)) + amdgpu_amdkfd_evict_userptr(mem, amn->mm); } } + amdgpu_mn_read_unlock(amn); + return 0; } -/** - * amdgpu_mn_invalidate_range_end - callback to notify about mm change - * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range - * - * Release the lock again to allow new command submissions. +/* Low bits of any reasonable mm pointer will be unused due to struct + * alignment. Use these bits to make a unique key from the mm pointer + * and notifier type. */ -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); - - amdgpu_mn_read_unlock(amn); -} +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) -static const struct mmu_notifier_ops amdgpu_mn_ops[] = { +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { [AMDGPU_MN_TYPE_GFX] = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx, + .release = amdgpu_hmm_mirror_release }, [AMDGPU_MN_TYPE_HSA] = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa, + .release = amdgpu_hmm_mirror_release }, }; -/* Low bits of any reasonable mm pointer will be unused due to struct - * alignment. Use these bits to make a unique key from the mm pointer - * and notifier type. - */ -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) - /** - * amdgpu_mn_get - create notifier context + * amdgpu_mn_get - create HMM mirror context * * @adev: amdgpu device pointer * @type: type of MMU notifier context * - * Creates a notifier context for current->mm. + * Creates a HMM mirror context for current->mm. */ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type) @@ -401,12 +375,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, amn->mm = mm; init_rwsem(&amn->lock); amn->type = type; - amn->mn.ops = &amdgpu_mn_ops[type]; amn->objects = RB_ROOT_CACHED; - mutex_init(&amn->read_lock); - atomic_set(&amn->recursion, 0); - r = __mmu_notifier_register(&amn->mn, mm); + amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; + r = hmm_mirror_register(&amn->mirror, mm); if (r) goto free_amn; @@ -432,7 +404,7 @@ free_amn: * @bo: amdgpu buffer object * @addr: userptr addr we should monitor * - * Registers an MMU notifier for the given BO at the specified address. + * Registers an HMM mirror for the given BO at the specified address. * Returns 0 on success, -ERRNO if anything goes wrong. */ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) @@ -488,11 +460,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) } /** - * amdgpu_mn_unregister - unregister a BO for notifier updates + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates * * @bo: amdgpu buffer object * - * Remove any registration of MMU notifier updates from the buffer object. + * Remove any registration of HMM mirror updates from the buffer object. */ void amdgpu_mn_unregister(struct amdgpu_bo *bo) { @@ -528,3 +500,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mutex_unlock(&adev->mn_lock); } +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + +void amdgpu_hmm_init_range(struct hmm_range *range) +{ + if (range) { + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->pfns = NULL; + INIT_LIST_HEAD(&range->list); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index eb0f432f78fe..f5b67c63ed6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -25,22 +25,24 @@ #define __AMDGPU_MN_H__ /* - * MMU Notifier + * HMM mirror */ struct amdgpu_mn; +struct hmm_range; enum amdgpu_mn_type { AMDGPU_MN_TYPE_GFX, AMDGPU_MN_TYPE_HSA, }; -#if defined(CONFIG_MMU_NOTIFIER) +#if defined(CONFIG_HMM_MIRROR) void amdgpu_mn_lock(struct amdgpu_mn *mn); void amdgpu_mn_unlock(struct amdgpu_mn *mn); struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); +void amdgpu_hmm_init_range(struct hmm_range *range); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} @@ -51,6 +53,8 @@ static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, } static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { + DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, " + "add CONFIG_ZONE_DEVICE=y in config file to fix this\n"); return -ENODEV; } static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 2e9e3db778c6..eb9975f4decb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -331,8 +331,6 @@ struct amdgpu_mode_info { struct drm_property *audio_property; /* FMT dithering */ struct drm_property *dither_property; - /* maximum number of bits per channel for monitor color */ - struct drm_property *max_bpc_property; /* Adaptive Backlight Modulation (power feature) */ struct drm_property *abm_level_property; /* hardcoded DFP edid from BIOS */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 34471dbaa872..a73e1903d29b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -67,6 +67,15 @@ static const struct cg_flag_name clocks[] = { {0, NULL}, }; +static const struct hwmon_temp_label { + enum PP_HWMON_TEMP channel; + const char *label; +} temp_label[] = { + {PP_TEMP_EDGE, "edge"}, + {PP_TEMP_JUNCTION, "junction"}, + {PP_TEMP_MEM, "mem"}, +}; + void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) { if (adev->pm.dpm_enabled) { @@ -758,7 +767,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev, pr_debug("featuremask = 0x%llx\n", featuremask); - if (adev->powerplay.pp_funcs->set_ppfeature_status) { + if (is_support_sw_smu(adev)) { + ret = smu_set_ppfeature_status(&adev->smu, featuremask); + if (ret) + return -EINVAL; + } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); if (ret) return -EINVAL; @@ -774,7 +787,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->get_ppfeature_status) + if (is_support_sw_smu(adev)) { + return smu_get_ppfeature_status(&adev->smu, buf); + } else if (adev->powerplay.pp_funcs->get_ppfeature_status) return amdgpu_dpm_get_ppfeature_status(adev, buf); return snprintf(buf, PAGE_SIZE, "\n"); @@ -1303,6 +1318,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, } /** + * DOC: mem_busy_percent + * + * The amdgpu driver provides a sysfs API for reading how busy the VRAM + * is as a percentage. The file mem_busy_percent is used for this. + * The SMU firmware computes a percentage of load based on the + * aggregate activity level in the IP cores. + */ +static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int r, value, size = sizeof(value); + + /* read the IP busy sensor */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, + (void *)&value, &size); + + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", value); +} + +/** * DOC: pcie_bw * * The amdgpu driver provides a sysfs API for estimating how much data @@ -1327,6 +1368,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, count0, count1, pcie_get_mps(adev->pdev)); } +/** + * DOC: unique_id + * + * The amdgpu driver provides a sysfs API for providing a unique ID for the GPU + * The file unique_id is used for this. + * This will provide a Unique ID that will persist from machine to machine + * + * NOTE: This will only work for GFX9 and newer. This file will be absent + * on unsupported ASICs (GFX8 and older) + */ +static ssize_t amdgpu_get_unique_id(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + if (adev->unique_id) + return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id); + + return 0; +} + static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, amdgpu_get_dpm_forced_performance_level, @@ -1371,10 +1435,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, amdgpu_set_pp_od_clk_voltage); static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, amdgpu_get_busy_percent, NULL); +static DEVICE_ATTR(mem_busy_percent, S_IRUGO, + amdgpu_get_memory_busy_percent, NULL); static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, amdgpu_get_ppfeature_status, amdgpu_set_ppfeature_status); +static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -1382,6 +1449,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; + int channel = to_sensor_dev_attr(attr)->index; int r, temp, size = sizeof(temp); /* Can't get temperature when the card is off */ @@ -1389,11 +1457,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* get the temperature */ - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, - (void *)&temp, &size); - if (r) - return r; + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + switch (channel) { + case PP_TEMP_JUNCTION: + /* get current junction temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_EDGE: + /* get current edge temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_MEM: + /* get current memory temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + } return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -1414,6 +1503,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", temp); } +static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int hyst = to_sensor_dev_attr(attr)->index; + int temp; + + if (hyst) + temp = adev->pm.dpm.thermal.min_hotspot_temp; + else + temp = adev->pm.dpm.thermal.max_hotspot_crit_temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + +static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int hyst = to_sensor_dev_attr(attr)->index; + int temp; + + if (hyst) + temp = adev->pm.dpm.thermal.min_mem_temp; + else + temp = adev->pm.dpm.thermal.max_mem_crit_temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + +static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int channel = to_sensor_dev_attr(attr)->index; + + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label); +} + +static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int channel = to_sensor_dev_attr(attr)->index; + int temp = 0; + + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + switch (channel) { + case PP_TEMP_JUNCTION: + temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp; + break; + case PP_TEMP_EDGE: + temp = adev->pm.dpm.thermal.max_edge_emergency_temp; + break; + case PP_TEMP_MEM: + temp = adev->pm.dpm.thermal.max_mem_emergency_temp; + break; + } + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, struct device_attribute *attr, char *buf) @@ -1983,11 +2142,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * hwmon interfaces for GPU temperature: * - * - temp1_input: the on die GPU temperature in millidegrees Celsius + * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius + * - temp2_input and temp3_input are supported on SOC15 dGPUs only + * + * - temp[1-3]_label: temperature channel label + * - temp2_label and temp3_label are supported on SOC15 dGPUs only + * + * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius + * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only * - * - temp1_crit: temperature critical max value in millidegrees Celsius + * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only * - * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius + * - these are supported on SOC15 dGPUs only * * hwmon interfaces for GPU voltage: * @@ -2035,9 +2203,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * */ -static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM); +static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); +static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM); static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0); static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0); static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); @@ -2064,6 +2244,18 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, &sensor_dev_attr_temp1_crit.dev_attr.attr, &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp2_input.dev_attr.attr, + &sensor_dev_attr_temp2_crit.dev_attr.attr, + &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp3_input.dev_attr.attr, + &sensor_dev_attr_temp3_crit.dev_attr.attr, + &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp1_emergency.dev_attr.attr, + &sensor_dev_attr_temp2_emergency.dev_attr.attr, + &sensor_dev_attr_temp3_emergency.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + &sensor_dev_attr_temp2_label.dev_attr.attr, + &sensor_dev_attr_temp3_label.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_pwm1_enable.dev_attr.attr, &sensor_dev_attr_pwm1_min.dev_attr.attr, @@ -2186,6 +2378,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_freq2_label.dev_attr.attr)) return 0; + /* only SOC15 dGPUs support hotspot and mem temperatures */ + if (((adev->flags & AMD_IS_APU) || + adev->asic_type < CHIP_VEGA10) && + (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_input.dev_attr.attr || + attr == &sensor_dev_attr_temp3_input.dev_attr.attr || + attr == &sensor_dev_attr_temp2_label.dev_attr.attr || + attr == &sensor_dev_attr_temp3_label.dev_attr.attr)) + return 0; + return effective_mode; } @@ -2612,6 +2820,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) "gpu_busy_level\n"); return ret; } + /* APU does not have its own dedicated memory */ + if (!(adev->flags & AMD_IS_APU)) { + ret = device_create_file(adev->dev, + &dev_attr_mem_busy_percent); + if (ret) { + DRM_ERROR("failed to create device file " + "mem_busy_percent\n"); + return ret; + } + } /* PCIe Perf counters won't work on APU nodes */ if (!(adev->flags & AMD_IS_APU)) { ret = device_create_file(adev->dev, &dev_attr_pcie_bw); @@ -2620,6 +2838,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } } + if (adev->unique_id) + ret = device_create_file(adev->dev, &dev_attr_unique_id); + if (ret) { + DRM_ERROR("failed to create device file unique_id\n"); + return ret; + } ret = amdgpu_debugfs_pm_init(adev); if (ret) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -2678,7 +2902,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) &dev_attr_pp_od_clk_voltage); device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); if (!(adev->flags & AMD_IS_APU)) + device_remove_file(adev->dev, &dev_attr_mem_busy_percent); + if (!(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_pcie_bw); + if (adev->unique_id) + device_remove_file(adev->dev, &dev_attr_unique_id); if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_ppfeatures); @@ -2775,6 +3003,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a /* GPU Load */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size)) seq_printf(m, "GPU Load: %u %%\n", value); + /* MEM Load */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size)) + seq_printf(m, "MEM Load: %u %%\n", value); + seq_printf(m, "\n"); /* SMC feature mask */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 86cc24b2e0aa..af9835c8395d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -289,6 +289,34 @@ static int psp_asd_load(struct psp_context *psp) return ret; } +static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t id, uint32_t value) +{ + cmd->cmd_id = GFX_CMD_ID_PROG_REG; + cmd->cmd.cmd_setup_reg_prog.reg_value = value; + cmd->cmd.cmd_setup_reg_prog.reg_id = id; +} + +int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, + uint32_t value) +{ + struct psp_gfx_cmd_resp *cmd = NULL; + int ret = 0; + + if (reg >= PSP_REG_LAST) + return -EINVAL; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_reg_prog_cmd_buf(cmd, reg, value); + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + return ret; +} + static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared, uint32_t xgmi_ta_size, uint32_t shared_size) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index cde113f07c96..cf49539b0b07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -62,6 +62,14 @@ struct psp_ring uint32_t ring_size; }; +/* More registers may will be supported */ +enum psp_reg_prog_id { + PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */ + PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */ + PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */ + PSP_REG_LAST +}; + struct psp_funcs { int (*init_microcode)(struct psp_context *psp); @@ -95,12 +103,26 @@ struct psp_funcs int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); }; +#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 +struct psp_xgmi_node_info { + uint64_t node_id; + uint8_t num_hops; + uint8_t is_sharing_enabled; + enum ta_xgmi_assigned_sdma_engine sdma_engine; +}; + +struct psp_xgmi_topology_info { + uint32_t num_nodes; + struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; +}; + struct psp_xgmi_context { uint8_t initialized; uint32_t session_id; struct amdgpu_bo *xgmi_shared_bo; uint64_t xgmi_shared_mc_addr; void *xgmi_shared_buf; + struct psp_xgmi_topology_info top_info; }; struct psp_ras_context { @@ -181,18 +203,6 @@ struct amdgpu_psp_funcs { enum AMDGPU_UCODE_ID); }; -#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 -struct psp_xgmi_node_info { - uint64_t node_id; - uint8_t num_hops; - uint8_t is_sharing_enabled; - enum ta_xgmi_assigned_sdma_engine sdma_engine; -}; - -struct psp_xgmi_topology_info { - uint32_t num_nodes; - struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; -}; #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) @@ -250,5 +260,6 @@ int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; - +int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, + uint32_t value); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 22bd21efe6b1..7c8a4aedf07c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -90,6 +90,12 @@ struct ras_manager { struct ras_err_data err_data; }; +struct ras_badpage { + unsigned int bp; + unsigned int size; + unsigned int flags; +}; + const char *ras_error_string[] = { "none", "parity", @@ -118,7 +124,8 @@ const char *ras_block_string[] = { #define ras_err_str(i) (ras_error_string[ffs(i)]) #define ras_block_str(i) (ras_block_string[i]) -#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 +#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 +#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) static void amdgpu_ras_self_test(struct amdgpu_device *adev) @@ -237,8 +244,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } -/* - * DOC: ras debugfs control interface +/** + * DOC: AMDGPU RAS debugfs control interface * * It accepts struct ras_debug_if who has two members. * @@ -521,6 +528,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, enable ? "enable":"disable", ras_block_str(head->block), ret); + if (ret == TA_RAS_STATUS__RESET_NEEDED) + return -EAGAIN; return -EINVAL; } @@ -541,16 +550,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, return -EINVAL; if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) { - /* If ras is enabled by vbios, we set up ras object first in - * both case. For enable, that is all what we need do. For - * disable, we need perform a ras TA disable cmd after that. - */ - ret = __amdgpu_ras_feature_enable(adev, head, 1); - if (ret) - return ret; + if (enable) { + /* There is no harm to issue a ras TA cmd regardless of + * the currecnt ras state. + * If current state == target state, it will do nothing + * But sometimes it requests driver to reset and repost + * with error code -EAGAIN. + */ + ret = amdgpu_ras_feature_enable(adev, head, 1); + /* With old ras TA, we might fail to enable ras. + * Log it and just setup the object. + * TODO need remove this WA in the future. + */ + if (ret == -EINVAL) { + ret = __amdgpu_ras_feature_enable(adev, head, 1); + if (!ret) + DRM_INFO("RAS INFO: %s setup object\n", + ras_block_str(head->block)); + } + } else { + /* setup the object then issue a ras TA disable cmd.*/ + ret = __amdgpu_ras_feature_enable(adev, head, 1); + if (ret) + return ret; - if (!enable) ret = amdgpu_ras_feature_enable(adev, head, 0); + } } else ret = amdgpu_ras_feature_enable(adev, head, enable); @@ -691,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, /* sysfs begin */ +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage **bps, unsigned int *count); + +static char *amdgpu_ras_badpage_flags_str(unsigned int flags) +{ + switch (flags) { + case 0: + return "R"; + case 1: + return "P"; + case 2: + default: + return "F"; + }; +} + +/* + * DOC: ras sysfs gpu_vram_bad_pages interface + * + * It allows user to read the bad pages of vram on the gpu through + * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages + * + * It outputs multiple lines, and each line stands for one gpu page. + * + * The format of one line is below, + * gpu pfn : gpu page size : flags + * + * gpu pfn and gpu page size are printed in hex format. + * flags can be one of below character, + * R: reserved, this gpu page is reserved and not able to use. + * P: pending for reserve, this gpu page is marked as bad, will be reserved + * in next window of page_reserve. + * F: unable to reserve. this gpu page can't be reserved due to some reasons. + * + * examples: + * 0x00000001 : 0x00001000 : R + * 0x00000002 : 0x00001000 : P + */ + +static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, + struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t ppos, size_t count) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, badpages_attr); + struct amdgpu_device *adev = con->adev; + const unsigned int element_size = + sizeof("0xabcdabcd : 0x12345678 : R\n") - 1; + unsigned int start = div64_ul(ppos + element_size - 1, element_size); + unsigned int end = div64_ul(ppos + count - 1, element_size); + ssize_t s = 0; + struct ras_badpage *bps = NULL; + unsigned int bps_count = 0; + + memset(buf, 0, count); + + if (amdgpu_ras_badpages_read(adev, &bps, &bps_count)) + return 0; + + for (; start < end && start < bps_count; start++) + s += scnprintf(&buf[s], element_size + 1, + "0x%08x : 0x%08x : %1s\n", + bps[start].bp, + bps[start].size, + amdgpu_ras_badpage_flags_str(bps[start].flags)); + + kfree(bps); + + return s; +} + static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, struct device_attribute *attr, char *buf) { @@ -731,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) &con->features_attr.attr, NULL }; + struct bin_attribute *bin_attrs[] = { + &con->badpages_attr, + NULL + }; struct attribute_group group = { .name = "ras", .attrs = attrs, + .bin_attrs = bin_attrs, }; con->features_attr = (struct device_attribute) { @@ -743,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) }, .show = amdgpu_ras_sysfs_features_read, }; + + con->badpages_attr = (struct bin_attribute) { + .attr = { + .name = "gpu_vram_bad_pages", + .mode = S_IRUGO, + }, + .size = 0, + .private = NULL, + .read = amdgpu_ras_sysfs_badpages_read, + }; + sysfs_attr_init(attrs[0]); + sysfs_bin_attr_init(bin_attrs[0]); return sysfs_create_group(&adev->dev->kobj, &group); } @@ -755,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) &con->features_attr.attr, NULL }; + struct bin_attribute *bin_attrs[] = { + &con->badpages_attr, + NULL + }; struct attribute_group group = { .name = "ras", .attrs = attrs, + .bin_attrs = bin_attrs, }; sysfs_remove_group(&adev->dev->kobj, &group); @@ -1089,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) /* ih end */ /* recovery begin */ + +/* return 0 on success. + * caller need free bps. + */ +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage **bps, unsigned int *count) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + int i = 0; + int ret = 0; + + if (!con || !con->eh_data || !bps || !count) + return -EINVAL; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data || data->count == 0) { + *bps = NULL; + goto out; + } + + *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL); + if (!*bps) { + ret = -ENOMEM; + goto out; + } + + for (; i < data->count; i++) { + (*bps)[i] = (struct ras_badpage){ + .bp = data->bps[i].bp, + .size = AMDGPU_GPU_PAGE_SIZE, + .flags = 0, + }; + + if (data->last_reserved <= i) + (*bps)[i].flags = 1; + else if (data->bps[i].bo == NULL) + (*bps)[i].flags = 2; + } + + *count = data->count; +out: + mutex_unlock(&con->recovery_lock); + return ret; +} + static void amdgpu_ras_do_recovery(struct work_struct *work) { struct amdgpu_ras *ras = @@ -1340,6 +1505,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) } /* recovery end */ +/* return 0 if ras will reset gpu and repost.*/ +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, + unsigned int block) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (!ras) + return -EINVAL; + + ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET; + return 0; +} + /* * check hardware's ras ability which will be saved in hw_supported. * if hardware does not support ras, we can skip some ras initializtion and @@ -1415,8 +1593,10 @@ recovery_out: return -EINVAL; } -/* do some init work after IP late init as dependence */ -void amdgpu_ras_post_init(struct amdgpu_device *adev) +/* do some init work after IP late init as dependence. + * and it runs in resume/gpu reset/booting up cases. + */ +void amdgpu_ras_resume(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; @@ -1444,6 +1624,32 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev) } } } + + if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) { + con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET; + /* setup ras obj state as disabled. + * for init_by_vbios case. + * if we want to enable ras, just enable it in a normal way. + * If we want do disable it, need setup ras obj as enabled, + * then issue another TA disable cmd. + * See feature_enable_on_boot + */ + amdgpu_ras_disable_all_features(adev, 1); + amdgpu_ras_reset_gpu(adev, 0); + } +} + +void amdgpu_ras_suspend(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return; + + amdgpu_ras_disable_all_features(adev, 0); + /* Make sure all ras objects are disabled. */ + if (con->features) + amdgpu_ras_disable_all_features(adev, 1); } /* do some fini work before IP fini as dependence */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index eaef5edefc34..c6b34fbd695f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -93,6 +93,7 @@ struct amdgpu_ras { struct dentry *ent; /* sysfs */ struct device_attribute features_attr; + struct bin_attribute badpages_attr; /* block array */ struct ras_manager *objs; @@ -175,6 +176,12 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, return ras && (ras->supported & (1 << block)); } +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, + unsigned int block); + +void amdgpu_ras_resume(struct amdgpu_device *adev); +void amdgpu_ras_suspend(struct amdgpu_device *adev); + int amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); @@ -187,13 +194,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, bool is_baco) { - /* remove me when gpu reset works on vega20 A1. */ -#if 0 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) schedule_work(&ras->recovery_work); -#endif return 0; } @@ -255,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { /* called in ip_init and ip_fini */ int amdgpu_ras_init(struct amdgpu_device *adev); -void amdgpu_ras_post_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index d7fae2676269..cdddce938bf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -114,6 +114,7 @@ struct amdgpu_ring_funcs { uint32_t align_mask; u32 nop; bool support_64bit_ptrs; + bool no_user_fence; unsigned vmhub; unsigned extra_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0c52d1f9fe0f..7138dc1dd1f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include <linux/pagemap.h> #include <linux/debugfs.h> #include <linux/iommu.h> +#include <linux/hmm.h> #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" @@ -703,143 +704,191 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, /* * TTM backend functions. */ -struct amdgpu_ttm_gup_task_list { - struct list_head list; - struct task_struct *task; -}; - struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; struct task_struct *usertask; uint32_t userflags; - spinlock_t guptasklock; - struct list_head guptasks; - atomic_t mmu_invalidations; - uint32_t last_set_pages; +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + struct hmm_range *ranges; + int nr_ranges; +#endif }; /** - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR - * pointer to memory + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user + * memory and start HMM tracking CPU page table update * - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). - * This provides a wrapper around the get_user_pages() call to provide - * device accessible pages that back user memory. + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only + * once afterwards to stop HMM tracking */ +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + +/* Support Userptr pages cross max 16 vmas */ +#define MAX_NR_VMAS (16) + int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; - unsigned int flags = 0; - unsigned pinned = 0; - int r; + unsigned long start = gtt->userptr; + unsigned long end = start + ttm->num_pages * PAGE_SIZE; + struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS]; + struct hmm_range *ranges; + unsigned long nr_pages, i; + uint64_t *pfns, f; + int r = 0; if (!mm) /* Happens during process shutdown */ return -ESRCH; - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - flags |= FOLL_WRITE; - down_read(&mm->mmap_sem); - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* - * check that we only use anonymous memory to prevent problems - * with writeback - */ - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma; + /* user pages may cross multiple VMAs */ + gtt->nr_ranges = 0; + do { + unsigned long vm_start; - vma = find_vma(mm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(&mm->mmap_sem); - return -EPERM; + if (gtt->nr_ranges >= MAX_NR_VMAS) { + DRM_ERROR("Too many VMAs in userptr range\n"); + r = -EFAULT; + goto out; } + + vm_start = vma ? vma->vm_end : start; + vma = find_vma(mm, vm_start); + if (unlikely(!vma || vm_start < vma->vm_start)) { + r = -EFAULT; + goto out; + } + vmas[gtt->nr_ranges++] = vma; + } while (end > vma->vm_end); + + DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n", + start, gtt->nr_ranges, ttm->num_pages); + + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + vmas[0]->vm_file)) { + r = -EPERM; + goto out; } - /* loop enough times using contiguous pages of memory */ - do { - unsigned num_pages = ttm->num_pages - pinned; - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **p = pages + pinned; - struct amdgpu_ttm_gup_task_list guptask; + ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL); + if (unlikely(!ranges)) { + r = -ENOMEM; + goto out; + } - guptask.task = current; - spin_lock(>t->guptasklock); - list_add(&guptask.list, >t->guptasks); - spin_unlock(>t->guptasklock); + pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) { + r = -ENOMEM; + goto out_free_ranges; + } - if (mm == current->mm) - r = get_user_pages(userptr, num_pages, flags, p, NULL); - else - r = get_user_pages_remote(gtt->usertask, - mm, userptr, num_pages, - flags, p, NULL, NULL); + for (i = 0; i < gtt->nr_ranges; i++) + amdgpu_hmm_init_range(&ranges[i]); - spin_lock(>t->guptasklock); - list_del(&guptask.list); - spin_unlock(>t->guptasklock); + f = ranges[0].flags[HMM_PFN_VALID]; + f |= amdgpu_ttm_tt_is_readonly(ttm) ? + 0 : ranges[0].flags[HMM_PFN_WRITE]; + memset64(pfns, f, ttm->num_pages); - if (r < 0) - goto release_pages; + for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) { + ranges[i].vma = vmas[i]; + ranges[i].start = max(start, vmas[i]->vm_start); + ranges[i].end = min(end, vmas[i]->vm_end); + ranges[i].pfns = pfns + nr_pages; + nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE; - pinned += r; + r = hmm_vma_fault(&ranges[i], true); + if (unlikely(r)) + break; + } + if (unlikely(r)) { + while (i--) + hmm_vma_range_done(&ranges[i]); - } while (pinned < ttm->num_pages); + goto out_free_pfns; + } up_read(&mm->mmap_sem); + + for (i = 0; i < ttm->num_pages; i++) { + pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]); + if (!pages[i]) { + pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", + i, pfns[i]); + goto out_invalid_pfn; + } + } + gtt->ranges = ranges; + return 0; -release_pages: - release_pages(pages, pinned); +out_free_pfns: + kvfree(pfns); +out_free_ranges: + kvfree(ranges); +out: up_read(&mm->mmap_sem); + return r; + +out_invalid_pfn: + for (i = 0; i < gtt->nr_ranges; i++) + hmm_vma_range_done(&ranges[i]); + kvfree(pfns); + kvfree(ranges); + return -ENOMEM; } /** - * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * Check if the pages backing this ttm range have been invalidated * - * Called by amdgpu_cs_list_validate(). This creates the page list - * that backs user memory and will ultimately be mapped into the device - * address space. + * Returns: true if pages are still valid */ -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; + bool r = false; + int i; - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); - for (i = 0; i < ttm->num_pages; ++i) { - if (ttm->pages[i]) - put_page(ttm->pages[i]); + if (!gtt || !gtt->userptr) + return false; - ttm->pages[i] = pages ? pages[i] : NULL; + DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n", + gtt->userptr, gtt->nr_ranges, ttm->num_pages); + + WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns, + "No user pages to check\n"); + + if (gtt->ranges) { + for (i = 0; i < gtt->nr_ranges; i++) + r |= hmm_vma_range_done(>t->ranges[i]); + kvfree(gtt->ranges[0].pfns); + kvfree(gtt->ranges); + gtt->ranges = NULL; } + + return r; } +#endif /** - * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty + * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. * - * Called while unpinning userptr pages + * Called by amdgpu_cs_list_validate(). This creates the page list + * that backs user memory and will ultimately be mapped into the device + * address space. */ -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; - - for (i = 0; i < ttm->num_pages; ++i) { - struct page *page = ttm->pages[i]; + unsigned long i; - if (!page) - continue; - - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - set_page_dirty(page); - - mark_page_accessed(page); - } + for (i = 0; i < ttm->num_pages; ++i) + ttm->pages[i] = pages ? pages[i] : NULL; } /** @@ -901,10 +950,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - /* mark the pages as dirty */ - amdgpu_ttm_tt_mark_user_pages(ttm); - sg_free_table(ttm->sg); + +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + if (gtt->ranges && + ttm->pages[0] == hmm_pfn_to_page(>t->ranges[0], + gtt->ranges[0].pfns[0])) + WARN_ONCE(1, "Missing get_user_page_done\n"); +#endif } int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, @@ -1254,11 +1307,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->usertask = current->group_leader; get_task_struct(gtt->usertask); - spin_lock_init(>t->guptasklock); - INIT_LIST_HEAD(>t->guptasks); - atomic_set(>t->mmu_invalidations, 0); - gtt->last_set_pages = 0; - return 0; } @@ -1287,7 +1335,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct amdgpu_ttm_gup_task_list *entry; unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1300,48 +1347,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt->userptr > end || gtt->userptr + size <= start) return false; - /* Search the lists of tasks that hold this mapping and see - * if current is one of them. If it is return false. - */ - spin_lock(>t->guptasklock); - list_for_each_entry(entry, >t->guptasks, list) { - if (entry->task == current) { - spin_unlock(>t->guptasklock); - return false; - } - } - spin_unlock(>t->guptasklock); - - atomic_inc(>t->mmu_invalidations); - return true; } /** - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? - */ -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, - int *last_invalidated) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - int prev_invalidated = *last_invalidated; - - *last_invalidated = atomic_read(>t->mmu_invalidations); - return prev_invalidated != *last_invalidated; -} - -/** - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object - * been invalidated since the last time they've been set? + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? */ -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; if (gtt == NULL || !gtt->userptr) return false; - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; + return true; } /** @@ -1753,44 +1772,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize various on-chip memory pools */ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, - adev->gds.mem.total_size); + adev->gds.gds_size); if (r) { DRM_ERROR("Failed initializing GDS heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - 4, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, - adev->gds.gws.total_size); + adev->gds.gws_size); if (r) { DRM_ERROR("Failed initializing gws heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, - adev->gds.oa.total_size); + adev->gds.oa_size); if (r) { DRM_ERROR("Failed initializing oa heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - /* Register debugfs entries for amdgpu_ttm */ r = amdgpu_ttm_debugfs_init(adev); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index b5b2d101f7db..c2b7669004ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -101,9 +101,21 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); +#else +static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) +{ + return -EPERM; +} +static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +{ + return false; +} +#endif + void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); @@ -112,7 +124,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end); bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 7b33867036e7..33c1eb76c076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -313,6 +313,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; } +#define FW_VERSION_ATTR(name, mode, field) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct drm_device *ddev = dev_get_drvdata(dev); \ + struct amdgpu_device *adev = ddev->dev_private; \ + \ + return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \ +} \ +static DEVICE_ATTR(name, mode, show_##name, NULL) + +FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version); +FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version); +FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version); +FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version); +FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version); +FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version); +FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version); +FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version); +FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version); +FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version); +FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version); +FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version); +FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version); +FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version); +FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version); +FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version); +FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version); +FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version); +FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version); +FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version); +FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version); + +static struct attribute *fw_attrs[] = { + &dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr, + &dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr, + &dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr, + &dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr, + &dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr, + &dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr, + &dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr, + &dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr, + &dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr, + &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr, + &dev_attr_dmcu_fw_version.attr, NULL +}; + +static const struct attribute_group fw_attr_group = { + .name = "fw_version", + .attrs = fw_attrs +}; + +int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev) +{ + return sysfs_create_group(&adev->dev->kobj, &fw_attr_group); +} + +void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev) +{ + sysfs_remove_group(&adev->dev->kobj, &fw_attr_group); +} + static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, struct amdgpu_firmware_info *ucode, uint64_t mc_addr, void *kptr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 7ac25a1c7853..ec4c2ea1f05a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -291,7 +291,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, int amdgpu_ucode_init_bo(struct amdgpu_device *adev); int amdgpu_ucode_create_bo(struct amdgpu_device *adev); +int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev); void amdgpu_ucode_free_bo(struct amdgpu_device *adev); +void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev); enum amdgpu_firmware_load_type amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ecf6f96df2ad..118451f5e3aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -212,132 +212,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) return 0; } -static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) -{ - int ret_code; - uint32_t reg_data = 0; - uint32_t reg_data2 = 0; - struct amdgpu_ring *ring; - - /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.fw_based != new_state->fw_based) { - DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, - new_state->fw_based, new_state->jpeg); - - reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & - (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); - - if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { - ret_code = 0; - - if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - - if (!ret_code) { - /* pause DPG non-jpeg */ - reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, - UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); - - /* Restore */ - ring = &adev->vcn.ring_enc[0]; - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - - ring = &adev->vcn.ring_enc[1]; - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - - ring = &adev->vcn.ring_dec; - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - } - } else { - /* unpause dpg non-jpeg, no need to wait */ - reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - } - adev->vcn.pause_state.fw_based = new_state->fw_based; - } - - /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.jpeg != new_state->jpeg) { - DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, - new_state->fw_based, new_state->jpeg); - - reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & - (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); - - if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { - ret_code = 0; - - if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - - if (!ret_code) { - /* Make sure JPRG Snoop is disabled before sending the pause */ - reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); - reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; - WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); - - /* pause DPG jpeg */ - reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, - UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); - - /* Restore */ - ring = &adev->vcn.ring_jpeg; - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, - UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | - UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, - UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - - ring = &adev->vcn.ring_dec; - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - } - } else { - /* unpause dpg jpeg, no need to wait */ - reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - } - adev->vcn.pause_state.jpeg = new_state->jpeg; - } - - return 0; -} - static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = @@ -362,7 +236,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - amdgpu_vcn_pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, &new_state); } fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); @@ -417,7 +291,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) new_state.jpeg = VCN_DPG_STATE__PAUSE; - amdgpu_vcn_pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, &new_state); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index a0ad19af9080..a1ee19251aae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -45,6 +45,27 @@ #define VCN_ENC_CMD_REG_WRITE 0x0000000b #define VCN_ENC_CMD_REG_WAIT 0x0000000c +#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ + ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__MASK_EN_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \ + }) + +#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ + do { \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + } while (0) + enum engine_status_constants { UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, @@ -81,6 +102,8 @@ struct amdgpu_vcn { unsigned num_enc_rings; enum amd_powergating_state cur_state; struct dpg_pause_state pause_state; + int (*pause_dpg_mode)(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 7d484fad3909..1f0bd4d16475 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -426,3 +426,47 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest) return clk; } +void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + + if (virt->ops && virt->ops->init_reg_access_mode) + virt->ops->init_reg_access_mode(adev); +} + +bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev) +{ + bool ret = false; + struct amdgpu_virt *virt = &adev->virt; + + if (amdgpu_sriov_vf(adev) + && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH)) + ret = true; + + return ret; +} + +bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev) +{ + bool ret = false; + struct amdgpu_virt *virt = &adev->virt; + + if (amdgpu_sriov_vf(adev) + && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC) + && !(amdgpu_sriov_runtime(adev))) + ret = true; + + return ret; +} + +bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev) +{ + bool ret = false; + struct amdgpu_virt *virt = &adev->virt; + + if (amdgpu_sriov_vf(adev) + && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING)) + ret = true; + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 584947b7ccf3..dca25deee75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer { uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; }; +/* According to the fw feature, some new reg access modes are supported */ +#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */ +#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */ +#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */ +#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */ + /** * struct amdgpu_virt_ops - amdgpu device virt operations */ @@ -59,6 +65,7 @@ struct amdgpu_virt_ops { void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf); int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); + void (*init_reg_access_mode)(struct amdgpu_device *adev); }; /* @@ -258,6 +265,7 @@ struct amdgpu_virt { uint32_t gim_feature; /* protect DPM events to GIM */ struct mutex dpm_mutex; + uint32_t reg_access_mode; }; #define amdgpu_sriov_enabled(adev) \ @@ -307,4 +315,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest); uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); +void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev); +bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev); +bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev); +bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index a48c84c51775..d11eba09eadd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -40,6 +40,34 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive) return &hive->device_list; } +/** + * DOC: AMDGPU XGMI Support + * + * XGMI is a high speed interconnect that joins multiple GPU cards + * into a homogeneous memory space that is organized by a collective + * hive ID and individual node IDs, both of which are 64-bit numbers. + * + * The file xgmi_device_id contains the unique per GPU device ID and + * is stored in the /sys/class/drm/card${cardno}/device/ directory. + * + * Inside the device directory a sub-directory 'xgmi_hive_info' is + * created which contains the hive ID and the list of nodes. + * + * The hive ID is stored in: + * /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id + * + * The node information is stored in numbered directories: + * /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id + * + * Each device has their own xgmi_hive_info direction with a mirror + * set of node sub-directories. + * + * The XGMI memory space is built by contiguously adding the power of + * two padded VRAM space from each node to each other. + * + */ + + static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev, struct device_attribute *attr, char *buf) { @@ -238,7 +266,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev /* Each psp need to set the latest topology */ ret = psp_xgmi_set_topology_info(&adev->psp, hive->number_devices, - &hive->topology_info); + &adev->psp.xgmi_context.top_info); if (ret) dev_err(adev->dev, "XGMI: Set topology failure on device %llx, hive %llx, ret %d", @@ -248,9 +276,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev return ret; } + +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i; + + for (i = 0 ; i < top->num_nodes; ++i) + if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) + return top->nodes[i].num_hops; + return -EINVAL; +} + int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { - struct psp_xgmi_topology_info *hive_topology; + struct psp_xgmi_topology_info *top_info; struct amdgpu_hive_info *hive; struct amdgpu_xgmi *entry; struct amdgpu_device *tmp_adev = NULL; @@ -283,35 +324,46 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit; } - hive_topology = &hive->topology_info; + top_info = &adev->psp.xgmi_context.top_info; list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); list_for_each_entry(entry, &hive->device_list, head) - hive_topology->nodes[count++].node_id = entry->node_id; + top_info->nodes[count++].node_id = entry->node_id; + top_info->num_nodes = count; hive->number_devices = count; - /* Each psp need to get the latest topology */ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); + /* update node list for other device in the hive */ + if (tmp_adev != adev) { + top_info = &tmp_adev->psp.xgmi_context.top_info; + top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id; + top_info->num_nodes = count; + } + ret = amdgpu_xgmi_update_topology(hive, tmp_adev); + if (ret) + goto exit; + } + + /* get latest topology info for each device from psp */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info); if (ret) { dev_err(tmp_adev->dev, "XGMI: Get topology failure on device %llx, hive %llx, ret %d", tmp_adev->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret); /* To do : continue with some node failed or disable the whole hive */ - break; + goto exit; } } - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = amdgpu_xgmi_update_topology(hive, tmp_adev); - if (ret) - break; - } - if (!ret) ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive); + + mutex_unlock(&hive->hive_lock); +exit: if (!ret) dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n", adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); @@ -320,9 +372,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, ret); - - mutex_unlock(&hive->hive_lock); -exit: return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 3e9c91e9a4bf..fbcee31788c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -27,7 +27,6 @@ struct amdgpu_hive_info { uint64_t hive_id; struct list_head device_list; - struct psp_xgmi_topology_info topology_info; int number_devices; struct mutex hive_lock, reset_lock; struct kobject *kobj; @@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev int amdgpu_xgmi_add_device(struct amdgpu_device *adev); void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev); static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, struct amdgpu_device *bo_adev) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 07c1f239e9c3..3a4f20766a39 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1804,6 +1804,18 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, @@ -1821,6 +1833,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .init_doorbell_index = &legacy_doorbell_index_init, .get_pcie_usage = &cik_get_pcie_usage, .need_reset_on_init = &cik_need_reset_on_init, + .get_pcie_replay_count = &cik_get_pcie_replay_count, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index d5ebe566809b..8c09bf994acd 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -105,6 +105,431 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_DF_MGCG; } +/* hold counter assignment per gpu struct */ +struct df_v3_6_event_mask { + struct amdgpu_device gpu; + uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS]; +}; + +/* get assigned df perfmon ctr as int */ +static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev, + uint64_t config, + int *counter) +{ + struct df_v3_6_event_mask *mask; + int i; + + mask = container_of(adev, struct df_v3_6_event_mask, gpu); + + for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { + if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) { + *counter = i; + return; + } + } +} + +/* get address based on counter assignment */ +static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev, + uint64_t config, + int is_ctrl, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr) +{ + + int target_cntr = -1; + + df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + + if (target_cntr < 0) + return; + + switch (target_cntr) { + + case 0: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0; + break; + case 1: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1; + break; + case 2: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2; + break; + case 3: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3; + break; + + } + +} + +/* get read counter address */ +static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev, + uint64_t config, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr) +{ + df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr); +} + +/* get control counter settings i.e. address and values to set */ +static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, + uint64_t config, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr, + uint32_t *lo_val, + uint32_t *hi_val) +{ + + uint32_t eventsel, instance, unitmask; + uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0; + + df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); + + if (lo_val == NULL || hi_val == NULL) + return; + + if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { + DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x", + *lo_base_addr, *hi_base_addr); + return; + } + + eventsel = GET_EVENT(config); + instance = GET_INSTANCE(config); + unitmask = GET_UNITMASK(config); + + es_5_0 = eventsel & 0x3FUL; + es_13_6 = instance; + es_13_0 = (es_13_6 << 6) + es_5_0; + es_13_12 = (es_13_0 & 0x03000UL) >> 12; + es_11_8 = (es_13_0 & 0x0F00UL) >> 8; + es_7_0 = es_13_0 & 0x0FFUL; + *lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8); + *hi_val = (es_11_8 | ((es_13_12)<<(29))); +} + +/* assign df performance counters for read */ +static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, + uint64_t config, + int *is_assigned) +{ + + struct df_v3_6_event_mask *mask; + int i, target_cntr; + + target_cntr = -1; + + *is_assigned = 0; + + df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + + if (target_cntr >= 0) { + *is_assigned = 1; + return 0; + } + + mask = container_of(adev, struct df_v3_6_event_mask, gpu); + + for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { + if (mask->config_assign_mask[i] == 0ULL) { + mask->config_assign_mask[i] = config & 0x0FFFFFFUL; + return 0; + } + } + + return -ENOSPC; +} + +/* release performance counter */ +static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, + uint64_t config) +{ + + struct df_v3_6_event_mask *mask; + int target_cntr; + + target_cntr = -1; + + df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + + mask = container_of(adev, struct df_v3_6_event_mask, gpu); + + if (target_cntr >= 0) + mask->config_assign_mask[target_cntr] = 0ULL; + +} + +/* + * get xgmi link counters via programmable data fabric (df) counters (max 4) + * using cake tx event. + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @count -> counters to pass + * + */ + +static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev, + int instance, + uint64_t *count) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + uint64_t config; + + config = GET_INSTANCE_CONFIG(instance); + + df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, + &hi_base_addr); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return; + + lo_val = RREG32_PCIE(lo_base_addr); + hi_val = RREG32_PCIE(hi_base_addr); + + *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); +} + +/* + * reset xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * + */ +static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev, + int instance) +{ + uint32_t lo_base_addr, hi_base_addr; + uint64_t config; + + config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16); + + df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, + &hi_base_addr); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return; + + WREG32_PCIE(lo_base_addr, 0UL); + WREG32_PCIE(hi_base_addr, 0UL); +} + +/* + * add xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * + */ + +static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev, + int instance) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + uint64_t config; + int ret, is_assigned; + + if (instance < 0 || instance > 1) + return -EINVAL; + + config = GET_INSTANCE_CONFIG(instance); + + ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned); + + if (ret || is_assigned) + return ret; + + df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + &lo_val, + &hi_val); + + WREG32_PCIE(lo_base_addr, lo_val); + WREG32_PCIE(hi_base_addr, hi_val); + + return ret; +} + + +/* + * start xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @is_enable -> either resume or assign event via df perfmon + * + */ + +static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev, + int instance, + int is_enable) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val; + uint64_t config; + int ret; + + if (instance < 0 || instance > 1) + return -EINVAL; + + if (is_enable) { + + ret = df_v3_6_add_xgmi_link_cntr(adev, instance); + + if (ret) + return ret; + + } else { + + config = GET_INSTANCE_CONFIG(instance); + + df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + NULL, + NULL); + + if (lo_base_addr == 0) + return -EINVAL; + + lo_val = RREG32_PCIE(lo_base_addr); + + WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); + + ret = 0; + } + + return ret; + +} + +/* + * start xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @is_enable -> either pause or unassign event via df perfmon + * + */ + +static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev, + int instance, + int is_disable) +{ + + uint32_t lo_base_addr, hi_base_addr, lo_val; + uint64_t config; + + config = GET_INSTANCE_CONFIG(instance); + + if (is_disable) { + df_v3_6_reset_xgmi_link_cntr(adev, instance); + df_v3_6_pmc_release_cntr(adev, config); + } else { + + df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + NULL, + NULL); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return -EINVAL; + + lo_val = RREG32_PCIE(lo_base_addr); + + WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); + } + + return 0; +} + +static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, + int is_enable) +{ + int xgmi_tx_link, ret = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 + : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + + if (xgmi_tx_link >= 0) + ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link, + is_enable); + + if (ret) + return ret; + + ret = 0; + break; + default: + break; + } + + return ret; +} + +static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, + int is_disable) +{ + int xgmi_tx_link, ret = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 + : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + + if (xgmi_tx_link >= 0) { + ret = df_v3_6_stop_xgmi_link_cntr(adev, + xgmi_tx_link, + is_disable); + if (ret) + return ret; + } + + ret = 0; + break; + default: + break; + } + + return ret; +} + +static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, + uint64_t config, + uint64_t *count) +{ + + int xgmi_tx_link; + + switch (adev->asic_type) { + case CHIP_VEGA20: + xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 + : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + + if (xgmi_tx_link >= 0) { + df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link); + df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count); + } + + break; + default: + break; + } + +} + const struct amdgpu_df_funcs df_v3_6_funcs = { .init = df_v3_6_init, .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, @@ -113,4 +538,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = { .update_medium_grain_clock_gating = df_v3_6_update_medium_grain_clock_gating, .get_clockgating_state = df_v3_6_get_clockgating_state, + .pmc_start = df_v3_6_pmc_start, + .pmc_stop = df_v3_6_pmc_stop, + .pmc_get_count = df_v3_6_pmc_get_count }; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h index e79c58e5efcb..fcffd807764d 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h @@ -35,6 +35,23 @@ enum DF_V3_6_MGCG { DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15 }; +/* Defined in global_features.h as FTI_PERFMON_VISIBLE */ +#define AMDGPU_DF_MAX_COUNTERS 4 + +/* get flags from df perfmon config */ +#define GET_EVENT(x) (x & 0xFFUL) +#define GET_INSTANCE(x) ((x >> 8) & 0xFFUL) +#define GET_UNITMASK(x) ((x >> 16) & 0xFFUL) +#define GET_INSTANCE_CONFIG(x) (0ULL | (0x07ULL) \ + | ((0x046ULL + x) << 8) \ + | (0x02 << 16)) + +/* df event conf macros */ +#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \ + && GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2) +#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \ + && GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2) + extern const struct amdgpu_df_funcs df_v3_6_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index a59e0fdf5a97..4cd1731d62fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4493,12 +4493,8 @@ static int gfx_v7_0_sw_init(void *handle) static int gfx_v7_0_sw_fini(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + int i; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -5070,30 +5066,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev) static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; + adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 02955e6e9dd9..25400b708722 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2057,12 +2057,8 @@ static int gfx_v8_0_sw_init(void *handle) static int gfx_v8_0_sw_fini(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + int i; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -7010,30 +7006,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; + adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } } static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ba67d1023264..c763733619fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -34,6 +34,7 @@ #include "vega10_enum.h" #include "hdp/hdp_4_0_offset.h" +#include "soc15.h" #include "soc15_common.h" #include "clearstate_gfx9.h" #include "v9_structs.h" @@ -307,12 +308,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - soc15_program_register_sequence(adev, - golden_settings_gc_9_0, - ARRAY_SIZE(golden_settings_gc_9_0)); - soc15_program_register_sequence(adev, - golden_settings_gc_9_0_vg10, - ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + if (!amdgpu_virt_support_skip_setting(adev)) { + soc15_program_register_sequence(adev, + golden_settings_gc_9_0, + ARRAY_SIZE(golden_settings_gc_9_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_9_0_vg10, + ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + } break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -1458,8 +1461,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) /* GDS reserve memory: 64 bytes alignment */ adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); - adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; - adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; + adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); @@ -1567,7 +1569,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) gfx_v9_0_write_data_to_reg(ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), - (adev->gds.mem.total_size + + (adev->gds.gds_size + adev->gfx.ngg.gds_reserve_size)); amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); @@ -1781,10 +1783,6 @@ static int gfx_v9_0_sw_fini(void *handle) kfree(ras_if); } - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) @@ -1834,7 +1832,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); } static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) @@ -1902,8 +1900,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); } soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -1914,7 +1912,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) u32 tmp; int i; - WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v9_0_tiling_mode_table_init(adev); @@ -1957,7 +1955,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) */ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, + WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE, (adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | (adev->gfx.config.sc_prim_fifo_size_backend << @@ -2024,11 +2022,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_0_init_csb(struct amdgpu_device *adev) { /* csib */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), adev->gfx.rlc.clear_state_size); } @@ -2498,7 +2496,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].sched.ready = false; } - WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); udelay(50); } @@ -2696,9 +2694,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) int i; if (enable) { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); } else { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, + WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) adev->gfx.compute_ring[i].sched.ready = false; @@ -2759,9 +2757,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) @@ -2979,67 +2977,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) /* disable wptr polling */ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); /* enable doorbell? */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) break; udelay(1); } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); } /* set the pointer to the MQD */ - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); /* set MQD vmid to 0 */ - WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, mqd->cp_mqd_control); /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); /* set the wb address whether it's enabled or not */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi); /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); /* enable the doorbell if requested */ @@ -3054,19 +3052,19 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) mqd->cp_hqd_pq_doorbell_control); /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); /* set the vmid for the queue */ - WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); /* activate the queue */ - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); if (ring->use_doorbell) @@ -3083,7 +3081,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) @@ -3095,21 +3093,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) DRM_DEBUG("KIQ dequeue request failed.\n"); /* Manual disable if dequeue request times out */ - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0); } - WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); return 0; } @@ -3529,6 +3527,241 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); } +static const u32 vgpr_init_compute_shader[] = +{ + 0xb07c0000, 0xbe8000ff, + 0x000000f8, 0xbf110800, + 0x7e000280, 0x7e020280, + 0x7e040280, 0x7e060280, + 0x7e080280, 0x7e0a0280, + 0x7e0c0280, 0x7e0e0280, + 0x80808800, 0xbe803200, + 0xbf84fff5, 0xbf9c0000, + 0xd28c0001, 0x0001007f, + 0xd28d0001, 0x0002027e, + 0x10020288, 0xb8810904, + 0xb7814000, 0xd1196a01, + 0x00000301, 0xbe800087, + 0xbefc00c1, 0xd89c4000, + 0x00020201, 0xd89cc080, + 0x00040401, 0x320202ff, + 0x00000800, 0x80808100, + 0xbf84fff8, 0x7e020280, + 0xbf810000, 0x00000000, +}; + +static const u32 sgpr_init_compute_shader[] = +{ + 0xb07c0000, 0xbe8000ff, + 0x0000005f, 0xbee50080, + 0xbe812c65, 0xbe822c65, + 0xbe832c65, 0xbe842c65, + 0xbe852c65, 0xb77c0005, + 0x80808500, 0xbf84fff8, + 0xbe800080, 0xbf810000, +}; + +static const struct soc15_reg_entry vgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ +}; + +static const struct soc15_reg_entry sgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, +}; + +static const struct soc15_reg_entry sec_ded_counter_registers[] = { + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) }, + { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) }, + { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) }, + { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) }, +}; + +static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + int r, i, j; + unsigned total_size, vgpr_offset, sgpr_offset; + u64 gpu_addr; + + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + + /* bail if the compute ring is not ready */ + if (!ring->sched.ready) + return 0; + + total_size = + ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size += + ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size = ALIGN(total_size, 256); + vgpr_offset = total_size; + total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); + sgpr_offset = total_size; + total_size += sizeof(sgpr_init_compute_shader); + + /* allocate an indirect buffer to put the commands in */ + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, total_size, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + return r; + } + + /* load the compute shaders */ + for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) + ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; + + for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) + ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; + + /* init the ib length to 0 */ + ib.length_dw = 0; + + /* VGPR */ + /* write the register state for the compute dispatch */ + for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* SGPR */ + /* write the register state for the compute dispatch */ + for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* shedule the ib on the ring */ + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) { + DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); + goto fail; + } + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(f, false); + if (r) { + DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); + goto fail; + } + + /* read back registers to clear the counters */ + mutex_lock(&adev->grbm_idx_mutex); + for (j = 0; j < 16; j++) { + gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + } + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + mutex_unlock(&adev->grbm_idx_mutex); + +fail: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); + + return r; +} + static int gfx_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3570,8 +3803,31 @@ static int gfx_v9_0_ecc_late_init(void *handle) return 0; } - if (*ras_if) + /* requires IBs so do in late init after IB pool is initialized */ + r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (r) + return r; + + /* handle resume path. */ + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__GFX); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ goto resume; + } *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); if (!*ras_if) @@ -3580,8 +3836,14 @@ static int gfx_v9_0_ecc_late_init(void *handle) **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__GFX); + r = 0; + } goto feature; + } ih_info.head = **ras_if; fs_info.head = **ras_if; @@ -3614,7 +3876,7 @@ interrupt: feature: kfree(*ras_if); *ras_if = NULL; - return -EINVAL; + return r; } static int gfx_v9_0_late_init(void *handle) @@ -4319,8 +4581,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); - WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -5056,13 +5318,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: - adev->gds.mem.total_size = 0x10000; + adev->gds.gds_size = 0x10000; break; case CHIP_RAVEN: - adev->gds.mem.total_size = 0x1000; + adev->gds.gds_size = 0x1000; break; default: - adev->gds.mem.total_size = 0x10000; + adev->gds.gds_size = 0x10000; break; } @@ -5086,28 +5348,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) break; } - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; } static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 7bb5359d0bbd..0dc8926111e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; /* Program the AGP BAR */ - WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); /* Program the system aperture low logical page number. */ - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) @@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) * workaround that increase system aperture high address (add 1) * to get rid of the VM fault and hardware hang. */ - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18)); else - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ @@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); - WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); } static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) @@ -267,9 +267,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) * VF copy registers so vbios post doesn't program them, for * SRIOV driver need to program them */ - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE, adev->gmc.vram_start >> 24); - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP, adev->gmc.vram_end >> 24); } @@ -303,7 +303,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 8a3b5e6fc6c9..8bf2ba310fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -289,7 +289,7 @@ out: * * @adev: amdgpu_device pointer * - * Load the GDDR MC ucode into the hw (CIK). + * Load the GDDR MC ucode into the hw (VI). * Returns 0 on success, error on failure. */ static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev) @@ -443,7 +443,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * * Set the location of vram, gart, and AGP in the GPU's - * physical address space (CIK). + * physical address space (VI). */ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) { @@ -515,7 +515,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * * Look up the amount of vram, vram width, and decide how to place - * vram and gart within the GPU's physical address space (CIK). + * vram and gart within the GPU's physical address space (VI). * Returns 0 for success. */ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) @@ -630,7 +630,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @vmid: vm instance to flush * - * Flush the TLB for the requested page table (CIK). + * Flush the TLB for the requested page table (VI). */ static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t flush_type) @@ -800,7 +800,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) * This sets up the TLBs, programs the page tables for VMID0, * sets up the hw for VMIDs 1-15 which are allocated on * demand, and sets up the global locations for the LDS, GDS, - * and GPUVM for FSA64 clients (CIK). + * and GPUVM for FSA64 clients (VI). * Returns 0 for success, errors for failure. */ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) @@ -948,7 +948,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * This disables all VM page table (CIK). + * This disables all VM page table (VI). */ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) { @@ -978,7 +978,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value * - * Print human readable fault information (CIK). + * Print human readable fault information (VI). */ static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, u32 addr, u32 mc_client, unsigned pasid) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3b7370d914a5..602593bab7a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -687,8 +687,25 @@ static int gmc_v9_0_ecc_late_init(void *handle) return 0; } /* handle resume path. */ - if (*ras_if) + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__UMC); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ goto resume; + } *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); if (!*ras_if) @@ -697,8 +714,14 @@ static int gmc_v9_0_ecc_late_init(void *handle) **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__UMC); + r = 0; + } goto feature; + } ih_info.head = **ras_if; fs_info.head = **ras_if; @@ -731,7 +754,7 @@ interrupt: feature: kfree(*ras_if); *ras_if = NULL; - return -EINVAL; + return r; } @@ -1100,6 +1123,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + if (amdgpu_virt_support_skip_setting(adev)) + break; + /* fall through */ case CHIP_VEGA20: soc15_program_register_sequence(adev, golden_settings_mmhub_1_0_0, @@ -1164,6 +1190,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); + /* After HDP is initialized, flush HDP.*/ adev->nbio_funcs->hdp_flush(adev, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 41a9a5779623..05d1d448c8f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + if (amdgpu_virt_support_skip_setting(adev)) + return; + /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset; @@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; + if (amdgpu_virt_support_skip_setting(adev)) + return; + /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); @@ -202,6 +208,9 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev) static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) { + if (amdgpu_virt_support_skip_setting(adev)) + return; + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0XFFFFFFFF); WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, @@ -338,11 +347,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) 0); WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); - /* Setup L2 cache */ - tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); + if (!amdgpu_virt_support_skip_setting(adev)) { + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); + } } /** @@ -354,6 +365,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; + + if (amdgpu_virt_support_skip_setting(adev)) + return; + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 2471e7cf75ea..31030f86be86 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -26,6 +26,7 @@ #include "nbio/nbio_6_1_sh_mask.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" +#include "mp/mp_9_0_offset.h" #include "soc15.h" #include "vega10_ih.h" #include "soc15_common.h" @@ -343,7 +344,7 @@ flr_done: /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) - && amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT) + && adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT) amdgpu_device_gpu_recover(adev, NULL); } @@ -448,6 +449,23 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); } +static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev) +{ + uint32_t rlc_fw_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); + uint32_t sos_fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + + adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY; + + if (rlc_fw_ver >= 0x5d) + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC; + + if (sos_fw_ver >= 0x80455) + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH; + + if (sos_fw_ver >= 0x8045b) + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING; +} + const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .req_full_gpu = xgpu_ai_request_full_gpu_access, .rel_full_gpu = xgpu_ai_release_full_gpu_access, @@ -456,4 +474,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .trans_msg = xgpu_ai_mailbox_trans_msg, .get_pp_clk = xgpu_ai_get_pp_clk, .force_dpm_level = xgpu_ai_force_dpm_level, + .init_reg_access_mode = xgpu_ai_init_reg_access_mode, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 1cdb98ad2db3..73419fa38159 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -29,9 +29,18 @@ #include "nbio/nbio_7_0_sh_mask.h" #include "nbio/nbio_7_0_smn.h" #include "vega10_enum.h" +#include <uapi/linux/kfd_ioctl.h> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c +static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) @@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { .ih_control = nbio_v7_0_ih_control, .init_registers = nbio_v7_0_init_registers, .detect_hw_virt = nbio_v7_0_detect_hw_virt, + .remap_hdp_registers = nbio_v7_0_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index c69d51598cfe..bfaaa327ae3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -27,9 +27,18 @@ #include "nbio/nbio_7_4_offset.h" #include "nbio/nbio_7_4_sh_mask.h" #include "nbio/nbio_7_4_0_smn.h" +#include <uapi/linux/kfd_ioctl.h> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c +static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) @@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .ih_control = nbio_v7_4_ih_control, .init_registers = nbio_v7_4_init_registers, .detect_hw_virt = nbio_v7_4_detect_hw_virt, + .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 2f79765b4bdb..7f8edc66ddff 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -94,6 +94,7 @@ enum psp_gfx_cmd_id GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ + GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ }; @@ -217,6 +218,12 @@ struct psp_gfx_cmd_save_restore_ip_fw enum psp_gfx_fw_type fw_type; /* FW type */ }; +/* Command to setup register program */ +struct psp_gfx_cmd_reg_prog { + uint32_t reg_value; + uint32_t reg_id; +}; + /* All GFX ring buffer commands. */ union psp_gfx_commands { @@ -226,6 +233,7 @@ union psp_gfx_commands struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw; + struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog; }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 143f0fae69d5..3f5827764df0 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -50,6 +50,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554}; +static bool psp_v3_1_support_vmr_ring(struct psp_context *psp); +static int psp_v3_1_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type); + static int psp_v3_1_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -296,27 +300,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp, psp_v3_1_reroute_ih(psp); - /* Write low address of the ring to C2PMSG_69 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); - /* Write high address of the ring to C2PMSG_70 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); - /* Write size of ring to C2PMSG_71 */ - psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); - /* Write the ring initialization command to C2PMSG_64 */ - psp_ring_reg = ring_type; - psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + if (psp_v3_1_support_vmr_ring(psp)) { + ret = psp_v3_1_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + /* No size initialization for sriov */ + /* Write the ring initialization command to C2PMSG_101 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg); + + /* there might be hardware handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, + mmMP0_SMN_C2PMSG_101), 0x80000000, + 0x8000FFFF, false); + } else { + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be hardware handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, + mmMP0_SMN_C2PMSG_64), 0x80000000, + 0x8000FFFF, false); + } return ret; } @@ -327,16 +361,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp, unsigned int psp_ring_reg = 0; struct amdgpu_device *adev = psp->adev; - /* Write the ring destroy command to C2PMSG_64 */ - psp_ring_reg = 3 << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + if (psp_v3_1_support_vmr_ring(psp)) { + /* Write the Destroy GPCOM ring command to C2PMSG_101 */ + psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg); + + /* there might be handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command to C2PMSG_64 */ + psp_ring_reg = 3 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } return ret; } @@ -375,7 +424,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp, uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; /* KM (GPCOM) prepare write pointer */ - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + if (psp_v3_1_support_vmr_ring(psp)) + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); /* Update KM RB frame pointer to new frame */ /* write_frame ptr increments by size of rb_frame in bytes */ @@ -404,7 +456,13 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp, /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + if (psp_v3_1_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); + /* send interrupt to PSP for SRIOV ring write pointer update */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); return 0; } @@ -574,6 +632,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) return 0; } +static bool psp_v3_1_support_vmr_ring(struct psp_context *psp) +{ + if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455) + return true; + + return false; +} + static const struct psp_funcs psp_v3_1_funcs = { .init_microcode = psp_v3_1_init_microcode, .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv, @@ -586,6 +652,7 @@ static const struct psp_funcs psp_v3_1_funcs = { .compare_sram_data = psp_v3_1_compare_sram_data, .smu_reload_quirk = psp_v3_1_smu_reload_quirk, .mode1_reset = psp_v3_1_mode1_reset, + .support_vmr_ring = psp_v3_1_support_vmr_ring, }; void psp_v3_1_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 9c88ce513d78..7a259c5b6c62 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -210,12 +210,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - soc15_program_register_sequence(adev, - golden_settings_sdma_4, - ARRAY_SIZE(golden_settings_sdma_4)); - soc15_program_register_sequence(adev, - golden_settings_sdma_vg10, - ARRAY_SIZE(golden_settings_sdma_vg10)); + if (!amdgpu_virt_support_skip_setting(adev)) { + soc15_program_register_sequence(adev, + golden_settings_sdma_4, + ARRAY_SIZE(golden_settings_sdma_4)); + soc15_program_register_sequence(adev, + golden_settings_sdma_vg10, + ARRAY_SIZE(golden_settings_sdma_vg10)); + } break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -1521,8 +1523,25 @@ static int sdma_v4_0_late_init(void *handle) } /* handle resume path. */ - if (*ras_if) + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__SDMA); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ goto resume; + } *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); if (!*ras_if) @@ -1531,8 +1550,14 @@ static int sdma_v4_0_late_init(void *handle) **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__SDMA); + r = 0; + } goto feature; + } ih_info.head = **ras_if; fs_info.head = **ras_if; @@ -1571,7 +1596,7 @@ interrupt: feature: kfree(*ras_if); *ras_if = NULL; - return -EINVAL; + return r; } static int sdma_v4_0_sw_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 9d8df68893b9..4ff930a47e10 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1375,6 +1375,18 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, @@ -1393,6 +1405,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .need_full_reset = &si_need_full_reset, .get_pcie_usage = &si_get_pcie_usage, .need_reset_on_init = &si_need_reset_on_init, + .get_pcie_replay_count = &si_get_pcie_replay_count, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b7e594c2bfb4..d9fdd95fd6e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -44,6 +44,7 @@ #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" #include "nbio/nbio_7_0_default.h" +#include "nbio/nbio_7_0_offset.h" #include "nbio/nbio_7_0_sh_mask.h" #include "nbio/nbio_7_0_smn.h" #include "mp/mp_9_0_offset.h" @@ -64,6 +65,9 @@ #include "dce_virtual.h" #include "mxgpu_ai.h" #include "amdgpu_smu.h" +#include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" +#include <uapi/linux/kfd_ioctl.h> #define mmMP0_MISC_CGTT_CTRL0 0x01b9 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 @@ -230,7 +234,7 @@ void soc15_grbm_select(struct amdgpu_device *adev, grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl); } static void soc15_vga_set_state(struct amdgpu_device *adev, bool state) @@ -385,7 +389,15 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, tmp &= ~(entry->and_mask); tmp |= entry->or_mask; } - WREG32(reg, tmp); + + if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) || + reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) || + reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) || + reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG)) + WREG32_RLC(reg, tmp); + else + WREG32(reg, tmp); + } } @@ -475,6 +487,13 @@ static int soc15_asic_reset(struct amdgpu_device *adev) soc15_asic_get_baco_capability(adev, &baco_reset); else baco_reset = false; + if (baco_reset) { + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (hive || (ras && ras->supported)) + baco_reset = false; + } break; default: baco_reset = false; @@ -606,12 +625,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VEGA20: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { - if (adev->asic_type == CHIP_VEGA20) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - else - amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + + /* For Vega10 SR-IOV, PSP need to be initialized before IH */ + if (amdgpu_sriov_vf(adev)) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + else + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + } + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + } else { + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + else + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + } } amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); @@ -733,7 +764,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) /* Just return false for soc15 GPUs. Reset does not seem to * be necessary. */ - return false; + if (!amdgpu_passthrough(adev)) + return false; if (adev->flags & AMD_IS_APU) return false; @@ -748,6 +780,18 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs soc15_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, @@ -765,6 +809,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .init_doorbell_index = &vega10_doorbell_index_init, .get_pcie_usage = &soc15_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, + .get_pcie_replay_count = &soc15_get_pcie_replay_count, }; static const struct amdgpu_asic_funcs vega20_asic_funcs = @@ -784,12 +829,16 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .init_doorbell_index = &vega20_doorbell_index_init, .get_pcie_usage = &soc15_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, + .get_pcie_replay_count = &soc15_get_pcie_replay_count, }; static int soc15_common_early_init(void *handle) { +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &soc15_pcie_rreg; @@ -998,11 +1047,17 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) int i; struct amdgpu_ring *ring; - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; - adev->nbio_funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); + /* Two reasons to skip + * 1, Host driver already programmed them + * 2, To avoid registers program violations in SR-IOV + */ + if (!amdgpu_virt_support_skip_setting(adev)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + adev->nbio_funcs->sdma_doorbell_range(adev, i, + ring->use_doorbell, ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); + } } adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, @@ -1019,6 +1074,12 @@ static int soc15_common_hw_init(void *handle) soc15_program_aspm(adev); /* setup nbio registers */ adev->nbio_funcs->init_registers(adev); + /* remap HDP registers to a hole in mmio space, + * for the purpose of expose those registers + * to process space + */ + if (adev->nbio_funcs->remap_hdp_registers) + adev->nbio_funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); /* HW doorbell routing policy: doorbell writing not diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index a66c8bfbbaa6..06f39f5bbf76 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -42,8 +42,18 @@ struct soc15_reg_golden { u32 or_mask; }; +struct soc15_reg_entry { + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t reg_value; +}; + #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg +#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) + #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 49c262540940..47f74dab365d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -69,26 +69,60 @@ } \ } while (0) -#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ - ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ - UVD_DPG_LMA_CTL__MASK_EN_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ - << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ - (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ - RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); }) +#define WREG32_RLC(reg, value) \ + do { \ + if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + uint32_t i = 0; \ + uint32_t retries = 50000; \ + uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \ + uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \ + uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \ + WREG32(r0, value); \ + WREG32(r1, (reg | 0x80000000)); \ + WREG32(spare_int, 0x1); \ + for (i = 0; i < retries; i++) { \ + u32 tmp = RREG32(r1); \ + if (!(tmp & 0x80000000)) \ + break; \ + udelay(10); \ + } \ + if (i >= retries) \ + pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \ + } else { \ + WREG32(reg, value); \ + } \ + } while (0) -#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ +#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \ do { \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ - UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ - << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ - (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ + if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \ + uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \ + uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \ + uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \ + if (target_reg == grbm_cntl) \ + WREG32(r2, value); \ + else if (target_reg == grbm_idx) \ + WREG32(r3, value); \ + WREG32(target_reg, value); \ + } else { \ + WREG32(target_reg, value); \ + } \ } while (0) -#endif +#define WREG32_SOC15_RLC(ip, inst, reg, value) \ + do { \ + uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\ + WREG32_RLC(target_reg, value); \ + } while (0) + +#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \ + WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \ + (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \ + & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) +#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \ + WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value) +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index c4fb58667fd4..bf3385280d3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v4_2_ring_get_rptr, .get_wptr = uvd_v4_2_ring_get_wptr, .set_wptr = uvd_v4_2_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 52bd8a654734..3210a7bd9a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -849,6 +849,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v5_0_ring_get_rptr, .get_wptr = uvd_v5_0_ring_get_wptr, .set_wptr = uvd_v5_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index c9edddf9f88a..c61a314c56cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1502,6 +1502,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, @@ -1527,6 +1528,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, @@ -1555,6 +1557,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_enc_ring_get_rptr, .get_wptr = uvd_v6_0_enc_ring_get_wptr, .set_wptr = uvd_v6_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 2191d3d0a219..cdb96d4cb424 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1759,6 +1759,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, @@ -1791,6 +1792,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_enc_ring_get_rptr, .get_wptr = uvd_v7_0_enc_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 40363ca6c5f1..ab0cb8325796 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v2_0_ring_get_rptr, .get_wptr = vce_v2_0_ring_get_wptr, .set_wptr = vce_v2_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6ec65cf11112..36902ec16dcf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, @@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index c0ec27991c22..e267b073f525 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1069,6 +1069,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 3dbc51f9d3b9..bb47f5b24be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr); static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); /** * vcn_v1_0_early_init - set function pointers @@ -140,7 +142,9 @@ static int vcn_v1_0_sw_init(void *handle) if (r) return r; - return r; + adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; + + return 0; } /** @@ -1204,6 +1208,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) return r; } +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state) +{ + int ret_code; + uint32_t reg_data = 0; + uint32_t reg_data2 = 0; + struct amdgpu_ring *ring; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG non-jpeg */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg non-jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.jpeg != new_state->jpeg) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); + + if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* Make sure JPRG Snoop is disabled before sending the pause */ + reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); + reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); + + /* pause DPG jpeg */ + reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.ring_jpeg; + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, + UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + + ring = &adev->vcn.ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.jpeg = new_state->jpeg; + } + + return 0; +} + static bool vcn_v1_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2054,6 +2184,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, @@ -2087,6 +2218,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = vcn_v1_0_enc_ring_get_rptr, .get_wptr = vcn_v1_0_enc_ring_get_wptr, @@ -2118,6 +2250,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { .align_mask = 0xf, .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .extra_dw = 64, .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 8d89ab7f0ae8..5f54acc70fec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -48,14 +48,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } adev->irq.ih.enabled = true; if (adev->irq.ih1.ring_size) { ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } adev->irq.ih1.enabled = true; } @@ -63,7 +78,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } adev->irq.ih2.enabled = true; } } @@ -81,7 +104,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } + /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); @@ -92,7 +123,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); @@ -104,7 +143,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } + /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); @@ -187,7 +235,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } /* set the writeback address whether it's enabled or not */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, @@ -214,7 +270,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WPTR_OVERFLOW_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); @@ -232,7 +296,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 5e5b42a0744a..b8adf3808de2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -987,6 +987,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static bool vi_need_reset_on_init(struct amdgpu_device *adev) { u32 clock_cntl, pc; @@ -1021,6 +1033,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .init_doorbell_index = &legacy_doorbell_index_init, .get_pcie_usage = &vi_get_pcie_usage, .need_reset_on_init = &vi_need_reset_on_init, + .get_pcie_replay_count = &vi_get_pcie_replay_count, }; #define CZ_REV_BRISTOL(rev) \ diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 3621efbd5759..e413d4a71fa3 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -21,7 +21,7 @@ */ static const uint32_t cwsr_trap_gfx8_hex[] = { - 0xbf820001, 0xbf82012b, + 0xbf820001, 0xbf820121, 0xb8f4f802, 0x89748674, 0xb8f5f803, 0x8675ff75, 0x00000400, 0xbf850017, @@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x8671ff71, 0x0000ffff, 0x8f728374, 0xb972e0c2, 0xbf800002, 0xb9740002, - 0xbe801f70, 0xb8f5f803, - 0x8675ff75, 0x00000100, - 0xbf840006, 0xbefa0080, - 0xb97a0203, 0x8671ff71, - 0x0000ffff, 0x80f08870, - 0x82f18071, 0xbefa0080, + 0xbe801f70, 0xbefa0080, 0xb97a0283, 0xbef60068, 0xbef70069, 0xb8fa1c07, 0x8e7a9c7a, 0x87717a71, @@ -279,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf82015d, + 0xbf820001, 0xbf82015e, 0xb8f8f802, 0x89788678, - 0xb8f1f803, 0x866eff71, - 0x00000400, 0xbf850037, - 0x866eff71, 0x00000800, - 0xbf850003, 0x866eff71, - 0x00000100, 0xbf840008, + 0xb8fbf803, 0x866eff7b, + 0x00000400, 0xbf85003b, + 0x866eff7b, 0x00000800, + 0xbf850003, 0x866eff7b, + 0x00000100, 0xbf84000c, 0x866eff78, 0x00002000, - 0xbf840001, 0xbf810000, + 0xbf840005, 0xbf8e0010, + 0xb8eef803, 0x866eff6e, + 0x00000400, 0xbf84fffb, 0x8778ff78, 0x00002000, 0x80ec886c, 0x82ed806d, 0xb8eef807, 0x866fff6e, @@ -295,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x8977ff77, 0xfc000000, 0x87776f77, 0x896eff6e, 0x001f8000, 0xb96ef807, - 0xb8f0f812, 0xb8f1f813, - 0x8ef08870, 0xc0071bb8, + 0xb8faf812, 0xb8fbf813, + 0x8efa887a, 0xc0071bbd, 0x00000000, 0xbf8cc07f, - 0xc0071c38, 0x00000008, + 0xc0071ebd, 0x00000008, 0xbf8cc07f, 0x86ee6e6e, 0xbf840001, 0xbe801d6e, - 0xb8f1f803, 0x8671ff71, + 0xb8fbf803, 0x867bff7b, 0x000001ff, 0xbf850002, 0x806c846c, 0x826d806d, 0x866dff6d, 0x0000ffff, @@ -311,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x8f6e8378, 0xb96ee0c2, 0xbf800002, 0xb9780002, 0xbe801f6c, 0x866dff6d, - 0x0000ffff, 0xbef00080, - 0xb9700283, 0xb8f02407, - 0x8e709c70, 0x876d706d, - 0xb8f003c7, 0x8e709b70, - 0x876d706d, 0xb8f0f807, - 0x8670ff70, 0x00007fff, - 0xb970f807, 0xbeee007e, + 0x0000ffff, 0xbefa0080, + 0xb97a0283, 0xb8fa2407, + 0x8e7a9b7a, 0x876d7a6d, + 0xb8fa03c7, 0x8e7a9a7a, + 0x876d7a6d, 0xb8faf807, + 0x867aff7a, 0x00007fff, + 0xb97af807, 0xbeee007e, 0xbeef007f, 0xbefe0180, - 0xbf900004, 0x87708478, - 0xb970f802, 0xbf8e0002, - 0xbf88fffe, 0xb8f02a05, + 0xbf900004, 0x877a8478, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8fb1605, 0x807b817b, + 0x8e7b867b, 0x807a7b7a, + 0x807a7e7a, 0x827b807f, + 0x867bff7b, 0x0000ffff, + 0xc04b1c3d, 0x00000050, + 0xbf8cc07f, 0xc04b1d3d, + 0x00000060, 0xbf8cc07f, + 0xc0431e7d, 0x00000074, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x867aff7f, + 0x08000000, 0x8f7a837a, + 0x87777a77, 0x867aff7f, + 0x70000000, 0x8f7a817a, + 0x87777a77, 0xbef1007c, + 0xbef00080, 0xb8f02a05, 0x80708170, 0x8e708a70, - 0xb8f11605, 0x80718171, - 0x8e718671, 0x80707170, - 0x80707e70, 0x8271807f, - 0x8671ff71, 0x0000ffff, - 0xc0471cb8, 0x00000040, - 0xbf8cc07f, 0xc04b1d38, - 0x00000048, 0xbf8cc07f, - 0xc0431e78, 0x00000058, - 0xbf8cc07f, 0xc0471eb8, - 0x0000005c, 0xbf8cc07f, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x8670ff7f, 0x08000000, - 0x8f708370, 0x87777077, - 0x8670ff7f, 0x70000000, - 0x8f708170, 0x87777077, - 0xbefb007c, 0xbefa0080, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0xb8f01605, - 0x80708170, 0x8e708670, - 0x807a707a, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xbefe007c, 0xbefc007a, - 0xc0611efa, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611b3a, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611b7a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611b3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611bba, + 0xbefc0070, 0xc0611b7a, 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611bfa, 0x0000007c, - 0xbf8cc07f, 0x807a847a, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611bba, 0x0000007c, + 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611e3a, + 0xbefc0070, 0xc0611bfa, 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xb8f1f803, 0xbefe007c, - 0xbefc007a, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611a3a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8fbf803, + 0xbefe007c, 0xbefc0070, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611a7a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xb8fbf801, 0xbefe007c, - 0xbefc007a, 0xc0611efa, + 0xbefc0070, 0xc0611a3a, 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0x8670ff7f, 0x04000000, - 0xbeef0080, 0x876f6f70, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0xb8f11605, - 0x80718171, 0x8e718471, - 0x8e768271, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747a74, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a717c, - 0xbf85ffe7, 0xbef40172, - 0xbefa0080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0xe0724000, - 0x7a1d0000, 0xe0724100, - 0x7a1d0100, 0xe0724200, - 0x7a1d0200, 0xe0724300, - 0x7a1d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8f14306, - 0x8671c171, 0xbf84002c, - 0xbf8a0000, 0x8670ff6f, - 0x04000000, 0xbf840028, - 0x8e718671, 0x8e718271, - 0xbef60071, 0xb8fa2a05, - 0x807a817a, 0x8e7a8a7a, - 0xb8f01605, 0x80708170, - 0x8e708670, 0x807a707a, - 0x807aff7a, 0x00000080, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8f1f801, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbeef0080, + 0x876f6f7a, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, 0xbef600ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x7a1d0002, 0x68040702, - 0xd0c9006a, 0x0000e302, - 0xbf87fff7, 0xbef70000, - 0xbefa00ff, 0x00000400, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, 0xbefe00c1, 0xbeff00c1, - 0xb8f12a05, 0x80718171, - 0x8e718271, 0x8e768871, + 0xbee80080, 0xbee90080, 0xbef600ff, 0x01000000, - 0xbefc0084, 0xbf0a717c, - 0xbf840015, 0xbf11017c, - 0x8071ff71, 0x00001000, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0xe0724000, 0x7a1d0000, - 0xe0724100, 0x7a1d0100, - 0xe0724200, 0x7a1d0200, - 0xe0724300, 0x7a1d0300, - 0x807c847c, 0x807aff7a, - 0x00000400, 0xbf0a717c, - 0xbf85ffef, 0xbf9c0000, - 0xbf8200dc, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x866eff7f, - 0x08000000, 0x8f6e836e, - 0x87776e77, 0x866eff7f, - 0x70000000, 0x8f6e816e, - 0x87776e77, 0x866eff7f, - 0x04000000, 0xbf84001e, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, - 0xb8ef4306, 0x866fc16f, - 0xbf840019, 0x8e6f866f, - 0x8e6f826f, 0xbef6006f, - 0xb8f82a05, 0x80788178, - 0x8e788a78, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x8078ff78, + 0xb8fb4306, 0x867bc17b, + 0xbf84002c, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840028, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0x8070ff70, 0x00000080, 0xbef600ff, 0x01000000, 0xbefc0080, - 0xe0510000, 0x781d0000, - 0xe0510100, 0x781d0000, - 0x807cff7c, 0x00000200, - 0x8078ff78, 0x00000200, - 0xbf0a6f7c, 0xbf85fff6, - 0xbef80080, 0xbefe00c1, - 0xbeff00c1, 0xb8ef2a05, - 0x806f816f, 0x8e6f826f, - 0x8e76886f, 0xbef600ff, - 0x01000000, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0x806fff6f, 0x00008000, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2a05, + 0x807b817b, 0x8e7b827b, + 0x8e76887b, 0xbef600ff, + 0x01000000, 0xbefc0084, + 0xbf0a7b7c, 0xbf840015, + 0xbf11017c, 0x807bff7b, + 0x00001000, 0x7e000300, 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xbf9c0000, 0xe0524000, - 0x6e1d0000, 0xe0524100, - 0x6e1d0100, 0xe0524200, - 0x6e1d0200, 0xe0524300, - 0x6e1d0300, 0xb8f82a05, + 0x7e060303, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffef, + 0xbf9c0000, 0xbf8200da, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x08000000, + 0x8f6e836e, 0x87776e77, + 0x866eff7f, 0x70000000, + 0x8f6e816e, 0x87776e77, + 0x866eff7f, 0x04000000, + 0xbf84001e, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf840019, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, 0x80786e78, - 0x80f8c078, 0xb8ef1605, - 0x806f816f, 0x8e6f846f, - 0x8e76826f, 0xbef600ff, - 0x01000000, 0xbefc006f, - 0xc031003a, 0x00000078, - 0x80f8c078, 0xbf8cc07f, - 0x80fc907c, 0xbf800000, - 0xbe802d00, 0xbe822d02, - 0xbe842d04, 0xbe862d06, - 0xbe882d08, 0xbe8a2d0a, - 0xbe8c2d0c, 0xbe8e2d0e, - 0xbf06807c, 0xbf84fff0, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbef80080, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x8e76886f, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0x806fff6f, + 0x00008000, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, + 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbefc006f, 0xc031003a, + 0x00000078, 0x80f8c078, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff0, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe007a, 0xbeff007b, - 0x866f71ff, 0x000003ff, - 0xb96f4803, 0x866f71ff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2a05, 0x806e816e, - 0x8e6e8a6e, 0xb8ef1605, - 0x806f816f, 0x8e6f866f, - 0x806e6f6e, 0x806e746e, - 0x826f8075, 0x866fff6f, - 0x0000ffff, 0xc0071cb7, - 0x00000040, 0xc00b1d37, - 0x00000048, 0xc0031e77, - 0x00000058, 0xc0071eb7, - 0x0000005c, 0xbf8cc07f, - 0x866fff6d, 0xf0000000, - 0x8f6f9c6f, 0x8e6f906f, - 0xbeee0080, 0x876e6f6e, - 0x866fff6d, 0x08000000, - 0x8f6f9b6f, 0x8e6f8f6f, - 0x876e6f6e, 0x866fff70, - 0x00800000, 0x8f6f976f, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e8370, - 0xb96ee0c2, 0xbf800002, - 0xb9700002, 0xbf8a0000, - 0x95806f6c, 0xbf810000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8ef1605, 0x806f816f, + 0x8e6f866f, 0x806e6f6e, + 0x806e746e, 0x826f8075, + 0x866fff6f, 0x0000ffff, + 0xc00b1c37, 0x00000050, + 0xc00b1d37, 0x00000060, + 0xc0031e77, 0x00000074, + 0xbf8cc07f, 0x866fff6d, + 0xf8000000, 0x8f6f9b6f, + 0x8e6f906f, 0xbeee0080, + 0x876e6f6e, 0x866fff6d, + 0x04000000, 0x8f6f9a6f, + 0x8e6f8f6f, 0x876e6f6e, + 0x866fff7a, 0x00800000, + 0x8f6f976f, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0x95806f6c, + 0xbf810000, 0x00000000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index abe1a5da29fb..a47f5b933120 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? end - //check whether there is mem_viol - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK - s_cbranch_scc0 L_NO_PC_REWIND - - //if so, need rewind PC assuming GDS operation gets NACKed - s_mov_b32 s_save_tmp, 0 //clear mem_viol bit - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] - s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8 - s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc - -L_NO_PC_REWIND: s_mov_b32 s_save_tmp, 0 //clear saveCtx bit s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 0bb9c577b3a2..6bae2e022c6e 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 -var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used -var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME +var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used +var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME var s_save_spi_init_lo = exec_lo var s_save_spi_init_hi = exec_hi @@ -162,8 +162,8 @@ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra var s_save_pc_hi = ttmp1 var s_save_exec_lo = ttmp2 var s_save_exec_hi = ttmp3 -var s_save_tmp = ttmp4 -var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine +var s_save_tmp = ttmp14 +var s_save_trapsts = ttmp15 //not really used until the end of the SAVE routine var s_save_xnack_mask_lo = ttmp6 var s_save_xnack_mask_hi = ttmp7 var s_save_buf_rsrc0 = ttmp8 @@ -171,9 +171,9 @@ var s_save_buf_rsrc1 = ttmp9 var s_save_buf_rsrc2 = ttmp10 var s_save_buf_rsrc3 = ttmp11 var s_save_status = ttmp12 -var s_save_mem_offset = ttmp14 +var s_save_mem_offset = ttmp4 var s_save_alloc_size = s_save_trapsts //conflict -var s_save_m0 = ttmp15 +var s_save_m0 = ttmp5 var s_save_ttmps_lo = s_save_tmp //no conflict var s_save_ttmps_hi = s_save_trapsts //no conflict @@ -207,10 +207,10 @@ var s_restore_mode = ttmp7 var s_restore_pc_lo = ttmp0 var s_restore_pc_hi = ttmp1 -var s_restore_exec_lo = ttmp14 -var s_restore_exec_hi = ttmp15 -var s_restore_status = ttmp4 -var s_restore_trapsts = ttmp5 +var s_restore_exec_lo = ttmp4 +var s_restore_exec_hi = ttmp5 +var s_restore_status = ttmp14 +var s_restore_trapsts = ttmp15 var s_restore_xnack_mask_lo = xnack_mask_lo var s_restore_xnack_mask_hi = xnack_mask_hi var s_restore_buf_rsrc0 = ttmp8 @@ -266,10 +266,16 @@ if (!EMU_RUN_HACK) L_HALT_WAVE: // If STATUS.HALT is set then this fault must come from SQC instruction fetch. - // We cannot prevent further faults so just terminate the wavefront. + // We cannot prevent further faults. Spin wait until context saved. s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK s_cbranch_scc0 L_NOT_ALREADY_HALTED - s_endpgm + +L_WAIT_CTX_SAVE: + s_sleep 0x10 + s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK + s_cbranch_scc0 L_WAIT_CTX_SAVE + L_NOT_ALREADY_HALTED: s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK @@ -293,12 +299,12 @@ L_FETCH_2ND_TRAP: // Read second-level TBA/TMA from first-level TMA and jump if available. // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) // ttmp12 holds SQ_WAVE_STATUS - s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO) - s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI) - s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 - s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA + s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO) + s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI) + s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA s_waitcnt lgkmcnt(0) - s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA + s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA s_waitcnt lgkmcnt(0) s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set @@ -405,7 +411,7 @@ end else end - // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 get_vgpr_size_bytes(s_save_ttmps_lo) get_sgpr_size_bytes(s_save_ttmps_hi) @@ -413,13 +419,11 @@ end s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0 s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF - s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1 - ack_sqc_store_workaround() - s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1 + s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1 ack_sqc_store_workaround() - s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1 + s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1 ack_sqc_store_workaround() - s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1 + s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1 ack_sqc_store_workaround() /* setup Resource Contants */ @@ -1093,7 +1097,7 @@ end //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode - // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 get_vgpr_size_bytes(s_restore_ttmps_lo) get_sgpr_size_bytes(s_restore_ttmps_hi) @@ -1101,10 +1105,9 @@ end s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF - s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1 - s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1 - s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1 - s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1 + s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1 + s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1 + s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1 s_waitcnt lgkmcnt(0) //reuse s_restore_m0 as a temp register diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 083bd8114db1..ea82828fdc76 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->type = KFD_QUEUE_TYPE_COMPUTE; else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) q_properties->type = KFD_QUEUE_TYPE_SDMA; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) + q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; else return -ENOTSUPP; @@ -522,7 +524,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep, struct kfd_process_device *pdd; dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) + if (!dev) return -EINVAL; mutex_lock(&p->mutex); @@ -1272,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, if (args->size != kfd_doorbell_process_slice(dev)) return -EINVAL; offset = kfd_get_process_doorbells(dev, p); + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { + if (args->size != PAGE_SIZE) + return -EINVAL; + offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + if (!offset) + return -ENOMEM; } mutex_lock(&p->mutex); @@ -1301,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); args->mmap_offset = offset; + /* MMIO is mapped through kfd device + * Generate a kfd mmap offset + */ + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { + args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id); + args->mmap_offset <<= PAGE_SHIFT; + } + return 0; err_free: @@ -1551,6 +1567,32 @@ copy_from_user_failed: return err; } +static int kfd_ioctl_alloc_queue_gws(struct file *filep, + struct kfd_process *p, void *data) +{ + int retval; + struct kfd_ioctl_alloc_queue_gws_args *args = data; + struct kfd_dev *dev; + + if (!hws_gws_support) + return -EINVAL; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) { + pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); + return -EINVAL; + } + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) + return -EINVAL; + + mutex_lock(&p->mutex); + retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); + mutex_unlock(&p->mutex); + + args->first_gws = 0; + return retval; +} + static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct kfd_process *p, void *data) { @@ -1753,6 +1795,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, kfd_ioctl_import_dmabuf, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, + kfd_ioctl_alloc_queue_gws, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) @@ -1845,6 +1889,39 @@ err_i1: return retcode; } +static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma) +{ + phys_addr_t address; + int ret; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + + vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | + VM_DONTDUMP | VM_PFNMAP; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + pr_debug("Process %d mapping mmio page\n" + " target user address == 0x%08llX\n" + " physical address == 0x%08llX\n" + " vm_flags == 0x%04lX\n" + " size == 0x%04lX\n", + process->pasid, (unsigned long long) vma->vm_start, + address, vma->vm_flags, PAGE_SIZE); + + ret = io_remap_pfn_range(vma, + vma->vm_start, + address >> PAGE_SHIFT, + PAGE_SIZE, + vma->vm_page_prot); + return ret; +} + + static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) { struct kfd_process *process; @@ -1875,6 +1952,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) if (!dev) return -ENODEV; return kfd_reserved_mem_mmap(dev, process, vma); + case KFD_MMAP_TYPE_MMIO: + if (!dev) + return -ENODEV; + return kfd_mmio_mmap(dev, process, vma); } return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 2e7c44955f43..59f8ca4297db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { #define polaris10_cache_info carrizo_cache_info #define polaris11_cache_info carrizo_cache_info #define polaris12_cache_info carrizo_cache_info +#define vegam_cache_info carrizo_cache_info /* TODO - check & update Vega10 cache details */ #define vega10_cache_info carrizo_cache_info #define raven_cache_info carrizo_cache_info @@ -372,7 +373,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) props->weight = 20; else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) - props->weight = 15; + props->weight = 15 * iolink->num_hops_xgmi; else props->weight = node_distance(id_from, id_to); @@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = polaris12_cache_info; num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); break; + case CHIP_VEGAM: + pcache_info = vegam_cache_info; + num_of_cache_types = ARRAY_SIZE(vegam_cache_info); + break; case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: @@ -1092,6 +1097,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, struct kfd_dev *kdev, + struct kfd_dev *peer_kdev, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain_from, uint32_t proximity_domain_to) @@ -1110,6 +1116,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; sub_type_hdr->proximity_domain_from = proximity_domain_from; sub_type_hdr->proximity_domain_to = proximity_domain_to; + sub_type_hdr->num_hops_xgmi = + amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); return 0; } @@ -1287,7 +1295,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, (char *)sub_type_hdr + sizeof(struct crat_subtype_iolink)); ret = kfd_fill_gpu_xgmi_link_to_gpu( - &avail_size, kdev, + &avail_size, kdev, peer_dev->gpu, (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain, nid); if (ret < 0) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 7c3f192fe25f..d54ceebd346b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -274,7 +274,8 @@ struct crat_subtype_iolink { uint32_t minimum_bandwidth_mbs; uint32_t maximum_bandwidth_mbs; uint32_t recommended_transfer_size; - uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH]; + uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1]; + uint8_t num_hops_xgmi; }; /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 765b58a17dc7..9d1b026e29e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = { .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = { .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = { .needs_iommu_device = true, .needs_pci_atomics = true, .num_sdma_engines = 1, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; #endif @@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, +}; + +static const struct kfd_device_info vegam_device_info = { + .asic_family = CHIP_VEGAM, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 4, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -282,6 +313,7 @@ static const struct kfd_device_info vega20_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, }; @@ -373,6 +405,9 @@ static const struct kfd_deviceid supported_devices[] = { { 0x6995, &polaris12_device_info }, /* Polaris12 */ { 0x6997, &polaris12_device_info }, /* Polaris12 */ { 0x699F, &polaris12_device_info }, /* Polaris12 */ + { 0x694C, &vegam_device_info }, /* VegaM */ + { 0x694E, &vegam_device_info }, /* VegaM */ + { 0x694F, &vegam_device_info }, /* VegaM */ { 0x6860, &vega10_device_info }, /* Vega10 */ { 0x6861, &vega10_device_info }, /* Vega10 */ { 0x6862, &vega10_device_info }, /* Vega10 */ @@ -518,6 +553,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } else kfd->max_proc_per_quantum = hws_max_conc_proc; + /* Allocate global GWS that is shared by all KFD processes */ + if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd, + amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) { + dev_err(kfd_device, "Could not allocate %d gws\n", + amdgpu_amdkfd_get_num_gws(kfd->kgd)); + goto out; + } /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; @@ -541,7 +583,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, false)) { dev_err(kfd_device, "Could not allocate %d bytes\n", size); - goto out; + goto alloc_gtt_mem_failure; } dev_info(kfd_device, "Allocated %d bytes on gart\n", size); @@ -611,6 +653,9 @@ kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); +alloc_gtt_mem_failure: + if (hws_gws_support) + amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", kfd->pdev->vendor, kfd->pdev->device); @@ -628,6 +673,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); kfd_gtt_sa_fini(kfd); amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); + if (hws_gws_support) + amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); } kfree(kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ae381450601c..ece35c7a77b5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static void deallocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int sdma_queue_id); + struct queue *q); static void kfd_process_hw_exception(struct work_struct *work); static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { - if (type == KFD_QUEUE_TYPE_SDMA) + if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) return KFD_MQD_TYPE_SDMA; return KFD_MQD_TYPE_CP; } @@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) return dqm->dev->device_info->num_sdma_engines; } +static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_xgmi_sdma_engines; +} + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) { return dqm->dev->device_info->num_sdma_engines * dqm->dev->device_info->num_sdma_queues_per_engine; } +unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_xgmi_sdma_engines + * dqm->dev->device_info->num_sdma_queues_per_engine; +} + void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) * preserve the user mode ABI. */ q->doorbell_id = q->properties.queue_id; - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { /* For SDMA queues on SOC15 with 8-byte doorbell, use static * doorbell assignments based on the engine and queue id. * The doobell index distance between RLC (2*i) and (2*i+1) @@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd, struct kfd_dev *dev = qpd->dqm->dev; if (!KFD_IS_SOC15(dev->device_info->asic_family) || - q->properties.type == KFD_QUEUE_TYPE_SDMA) + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) return; old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); @@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) retval = create_compute_queue_nocpsch(dqm, q, qpd); - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) retval = create_sdma_queue_nocpsch(dqm, q, qpd); else retval = -EINVAL; @@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's @@ -368,9 +384,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd_mgr; int retval; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (!mqd_mgr) - return -ENOMEM; + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; retval = allocate_hqd(dqm, q); if (retval) @@ -425,16 +439,17 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, int retval; struct mqd_manager *mqd_mgr; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) - return -ENOMEM; + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { deallocate_hqd(dqm, q); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); @@ -501,12 +516,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = -ENODEV; goto out_unlock; } - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto out_unlock; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -529,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) } } else if (prev_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -556,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { if (WARN(q->process->mm != current->mm, "should only run in user thread")) retval = -EFAULT; @@ -571,27 +584,6 @@ out_unlock: return retval; } -static struct mqd_manager *get_mqd_manager( - struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) -{ - struct mqd_manager *mqd_mgr; - - if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) - return NULL; - - pr_debug("mqd type %d\n", type); - - mqd_mgr = dqm->mqd_mgrs[type]; - if (!mqd_mgr) { - mqd_mgr = mqd_manager_init(type, dqm->dev); - if (!mqd_mgr) - pr_err("mqd manager is NULL"); - dqm->mqd_mgrs[type] = mqd_mgr; - } - - return mqd_mgr; -} - static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -612,13 +604,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (!q->properties.is_active) continue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { /* should not be here */ - pr_err("Cannot evict queue, mqd mgr is NULL\n"); - retval = -ENOMEM; - goto out; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; q->properties.is_evicted = true; q->properties.is_active = false; retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, @@ -717,13 +704,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (!q->properties.is_evicted) continue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { /* should not be here */ - pr_err("Cannot restore queue, mqd mgr is NULL\n"); - retval = -ENOMEM; - goto out; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; q->properties.is_evicted = false; q->properties.is_active = true; retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, @@ -812,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm, retval = dqm->asic_ops.update_qpd(dqm, qpd); dqm->processes_count++; - kfd_inc_compute_active(dqm->dev); dqm_unlock(dqm); + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + kfd_inc_compute_active(dqm->dev); + return retval; } @@ -836,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; - kfd_dec_compute_active(dqm->dev); goto out; } } @@ -844,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm, retval = 1; out: dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (!retval) + kfd_dec_compute_active(dqm->dev); + return retval; } @@ -879,6 +871,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; dqm->sdma_queue_count = 0; + dqm->xgmi_sdma_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -890,7 +883,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) } dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; - dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; + dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; + dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; return 0; } @@ -921,26 +915,56 @@ static int stop_nocpsch(struct device_queue_manager *dqm) } static int allocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int *sdma_queue_id) + struct queue *q) { int bit; - if (dqm->sdma_bitmap == 0) - return -ENOMEM; + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (dqm->sdma_bitmap == 0) + return -ENOMEM; + bit = __ffs64(dqm->sdma_bitmap); + dqm->sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + q->properties.sdma_engine_id = q->sdma_id % + get_num_sdma_engines(dqm); + q->properties.sdma_queue_id = q->sdma_id / + get_num_sdma_engines(dqm); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + if (dqm->xgmi_sdma_bitmap == 0) + return -ENOMEM; + bit = __ffs64(dqm->xgmi_sdma_bitmap); + dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + /* sdma_engine_id is sdma id including + * both PCIe-optimized SDMAs and XGMI- + * optimized SDMAs. The calculation below + * assumes the first N engines are always + * PCIe-optimized ones + */ + q->properties.sdma_engine_id = get_num_sdma_engines(dqm) + + q->sdma_id % get_num_xgmi_sdma_engines(dqm); + q->properties.sdma_queue_id = q->sdma_id / + get_num_xgmi_sdma_engines(dqm); + } - bit = ffs(dqm->sdma_bitmap) - 1; - dqm->sdma_bitmap &= ~(1 << bit); - *sdma_queue_id = bit; + pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); + pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); return 0; } static void deallocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int sdma_queue_id) + struct queue *q) { - if (sdma_queue_id >= get_num_sdma_queues(dqm)) - return; - dqm->sdma_bitmap |= (1 << sdma_queue_id); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (q->sdma_id >= get_num_sdma_queues(dqm)) + return; + dqm->sdma_bitmap |= (1ULL << q->sdma_id); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) + return; + dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); + } } static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, @@ -950,25 +974,16 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd_mgr; int retval; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); - if (!mqd_mgr) - return -ENOMEM; + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]; - retval = allocate_sdma_queue(dqm, &q->sdma_id); + retval = allocate_sdma_queue(dqm, q); if (retval) return retval; - q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); - q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); - retval = allocate_doorbell(qpd, q); if (retval) goto out_deallocate_sdma_queue; - pr_debug("SDMA id is: %d\n", q->sdma_id); - pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); - pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); @@ -987,7 +1002,7 @@ out_uninit_mqd: out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); return retval; } @@ -1045,8 +1060,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; + dqm->xgmi_sdma_queue_count = 0; dqm->active_runlist = false; - dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; + dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; + dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); @@ -1161,38 +1178,26 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, int retval; struct mqd_manager *mqd_mgr; - retval = 0; - - dqm_lock(dqm); - if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; - goto out_unlock; + goto out; } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - retval = allocate_sdma_queue(dqm, &q->sdma_id); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + retval = allocate_sdma_queue(dqm, q); if (retval) - goto out_unlock; - q->properties.sdma_queue_id = - q->sdma_id / get_num_sdma_engines(dqm); - q->properties.sdma_engine_id = - q->sdma_id % get_num_sdma_engines(dqm); + goto out; } retval = allocate_doorbell(qpd, q); if (retval) goto out_deallocate_sdma_queue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - - if (!mqd_mgr) { - retval = -ENOMEM; - goto out_deallocate_doorbell; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -1201,9 +1206,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.is_evicted = (q->properties.queue_size > 0 && q->properties.queue_percent > 0 && q->properties.queue_address != 0); - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, @@ -1211,6 +1214,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_doorbell; + dqm_lock(dqm); + list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) { @@ -1221,6 +1226,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -1236,11 +1243,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - deallocate_sdma_queue(dqm, q->sdma_id); -out_unlock: - dqm_unlock(dqm); - + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + deallocate_sdma_queue(dqm, q); +out: return retval; } @@ -1268,12 +1274,18 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, return 0; } -static int unmap_sdma_queues(struct device_queue_manager *dqm, - unsigned int sdma_engine) +static int unmap_sdma_queues(struct device_queue_manager *dqm) { - return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, - sdma_engine); + int i, retval = 0; + + for (i = 0; i < dqm->dev->device_info->num_sdma_engines + + dqm->dev->device_info->num_xgmi_sdma_engines; i++) { + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i); + if (retval) + return retval; + } + return retval; } /* dqm->lock mutex has to be locked before calling this function */ @@ -1309,13 +1321,11 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, if (!dqm->active_runlist) return retval; - pr_debug("Before destroying queues, sdma queue count is : %u\n", - dqm->sdma_queue_count); + pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n", + dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count); - if (dqm->sdma_queue_count > 0) { - unmap_sdma_queues(dqm, 0); - unmap_sdma_queues(dqm, 1); - } + if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count) + unmap_sdma_queues(dqm); retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, filter, filter_param, false, 0); @@ -1379,18 +1389,17 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, } - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto failed; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; deallocate_doorbell(qpd, q); if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } list_del(&q->list); @@ -1403,8 +1412,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - /* * Unconditionally decrement this counter, regardless of the queue's * type @@ -1415,9 +1422,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; -failed: failed_try_destroy_debugged_queue: dqm_unlock(dqm); @@ -1520,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, struct queue *q, *next; struct device_process_node *cur, *next_dpn; int retval = 0; + bool found = false; dqm_lock(dqm); @@ -1538,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; - kfd_dec_compute_active(dqm->dev); + found = true; break; } } dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (found) + kfd_dec_compute_active(dqm->dev); + return retval; } @@ -1564,11 +1581,7 @@ static int get_wave_state(struct device_queue_manager *dqm, goto dqm_unlock; } - mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (!mqd_mgr) { - r = -ENOMEM; - goto dqm_unlock; - } + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; if (!mqd_mgr->get_wave_state) { r = -EINVAL; @@ -1593,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, struct device_process_node *cur, *next_dpn; enum kfd_unmap_queues_filter filter = KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; + bool found = false; retval = 0; @@ -1611,7 +1625,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } if (q->properties.is_active) @@ -1626,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; - kfd_dec_compute_active(dqm->dev); + found = true; break; } } @@ -1638,21 +1655,68 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; } - /* lastly, free mqd resources */ + dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (found) + kfd_dec_compute_active(dqm->dev); + + /* Lastly, free mqd resources. + * Do uninit_mqd() after dqm_unlock to avoid circular locking. + */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto out; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; list_del(&q->list); qpd->queue_count--; mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); } -out: - dqm_unlock(dqm); + return retval; +} + +static int init_mqd_managers(struct device_queue_manager *dqm) +{ + int i, j; + struct mqd_manager *mqd_mgr; + + for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { + mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); + if (!mqd_mgr) { + pr_err("mqd manager [%d] initialization failed\n", i); + goto out_free; + } + dqm->mqd_mgrs[i] = mqd_mgr; + } + + return 0; + +out_free: + for (j = 0; j < i; j++) { + kfree(dqm->mqd_mgrs[j]); + dqm->mqd_mgrs[j] = NULL; + } + + return -ENOMEM; +} + +/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ +static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) +{ + int retval; + struct kfd_dev *dev = dqm->dev; + struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; + uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * + dev->device_info->num_sdma_engines * + dev->device_info->num_sdma_queues_per_engine + + dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; + + retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size, + &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), + (void *)&(mem_obj->cpu_ptr), true); + return retval; } @@ -1693,7 +1757,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.stop = stop_cpsch; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager; dqm->ops.register_process = register_process; dqm->ops.unregister_process = unregister_process; dqm->ops.uninitialize = uninitialize; @@ -1713,7 +1776,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_queue = create_queue_nocpsch; dqm->ops.destroy_queue = destroy_queue_nocpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager; dqm->ops.register_process = register_process; dqm->ops.unregister_process = unregister_process; dqm->ops.initialize = initialize_nocpsch; @@ -1749,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: device_queue_manager_init_vi_tonga(&dqm->asic_ops); break; @@ -1764,6 +1827,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) goto out_free; } + if (init_mqd_managers(dqm)) + goto out_free; + + if (allocate_hiq_sdma_mqd(dqm)) { + pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); + goto out_free; + } + if (!dqm->ops.initialize(dqm)) return dqm; @@ -1772,9 +1843,17 @@ out_free: return NULL; } +void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd) +{ + WARN(!mqd, "No hiq sdma mqd trunk to free"); + + amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem); +} + void device_queue_manager_uninit(struct device_queue_manager *dqm) { dqm->ops.uninitialize(dqm); + deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); kfree(dqm); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 70e38a2e23b9..88b4c007696e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -48,8 +48,6 @@ struct device_process_node { * * @update_queue: Queue update routine. * - * @get_mqd_manager: Returns the mqd manager according to the mqd type. - * * @exeute_queues: Dispatches the queues list to the H/W. * * @register_process: This routine associates a specific process with device. @@ -97,10 +95,6 @@ struct device_queue_manager_ops { int (*update_queue)(struct device_queue_manager *dqm, struct queue *q); - struct mqd_manager * (*get_mqd_manager) - (struct device_queue_manager *dqm, - enum KFD_MQD_TYPE type); - int (*register_process)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); @@ -158,6 +152,8 @@ struct device_queue_manager_asic_ops { void (*init_sdma_vm)(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); + struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); }; /** @@ -185,10 +181,12 @@ struct device_queue_manager { unsigned int processes_count; unsigned int queue_count; unsigned int sdma_queue_count; + unsigned int xgmi_sdma_queue_count; unsigned int total_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; - unsigned int sdma_bitmap; + uint64_t sdma_bitmap; + uint64_t xgmi_sdma_bitmap; unsigned int vmid_bitmap; uint64_t pipelines_addr; struct kfd_mem_obj *pipeline_mem; @@ -201,6 +199,7 @@ struct device_queue_manager { /* hw exception */ bool is_hws_hang; struct work_struct hw_exception_work; + struct kfd_mem_obj hiq_sdma_mqd; }; void device_queue_manager_init_cik( @@ -219,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm); unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); +unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index aed4c21417bf..0d26506798cf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -48,6 +48,7 @@ void device_queue_manager_init_cik( asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; asic_ops->update_qpd = update_qpd_cik; asic_ops->init_sdma_vm = init_sdma_vm; + asic_ops->mqd_manager_init = mqd_manager_init_cik; } void device_queue_manager_init_cik_hawaii( @@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii( asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; asic_ops->update_qpd = update_qpd_cik_hawaii; asic_ops->init_sdma_vm = init_sdma_vm_hawaii; + asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii; } static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index 417515332c35..e9fe39382371 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -37,6 +37,7 @@ void device_queue_manager_init_v9( { asic_ops->update_qpd = update_qpd_v9; asic_ops->init_sdma_vm = init_sdma_vm_v9; + asic_ops->mqd_manager_init = mqd_manager_init_v9; } static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index c3a5dcfe877a..3a7cb2f88366 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -54,6 +54,7 @@ void device_queue_manager_init_vi( asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; asic_ops->update_qpd = update_qpd_vi; asic_ops->init_sdma_vm = init_sdma_vm; + asic_ops->mqd_manager_init = mqd_manager_init_vi; } void device_queue_manager_init_vi_tonga( @@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga( asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; asic_ops->update_qpd = update_qpd_vi_tonga; asic_ops->init_sdma_vm = init_sdma_vm_tonga; + asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga; } static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 6e1d41c5bf86..d674d4b3340f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, return; /* Presumably process exited. */ memset(&memory_exception_data, 0, sizeof(memory_exception_data)); memory_exception_data.gpu_id = dev->id; - memory_exception_data.failure.imprecise = 1; + memory_exception_data.failure.imprecise = true; /* Set failure reason */ if (info) { memory_exception_data.va = (info->page_addr) << PAGE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 213ea5454d11..22a8e88b6a67 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: kfd_init_apertures_vi(pdd, id); break; case CHIP_VEGA10: @@ -435,5 +436,3 @@ int kfd_init_apertures(struct kfd_process *process) return 0; } - - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index f1596881f20a..1cc03b3ddbb9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -58,9 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->nop_packet = nop.u32all; switch (type) { case KFD_QUEUE_TYPE_DIQ: + kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ]; + break; case KFD_QUEUE_TYPE_HIQ: - kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm, - KFD_MQD_TYPE_HIQ); + kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; break; default: pr_err("Invalid queue type %d\n", type); @@ -314,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: kernel_queue_init_vi(&kq->ops_asic_specific); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c index 33830b1a5a54..07f02f8e4fe4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -153,14 +153,13 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, sizeof(struct pm4_mes_map_queues)); - packet->bitfields2.alloc_format = - alloc_format__mes_map_queues__one_per_pipe_vi; packet->bitfields2.num_queues = 1; packet->bitfields2.queue_sel = queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; packet->bitfields2.engine_sel = engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.gws_control_queue = q->gws ? 1 : 0; packet->bitfields2.queue_type = queue_type__mes_map_queues__normal_compute_vi; @@ -175,6 +174,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, queue_type__mes_map_queues__debug_interface_queue_vi; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = q->properties.sdma_engine_id + engine_sel__mes_map_queues__sdma0_vi; use_static = false; /* no static queues under SDMA */ @@ -221,6 +221,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, engine_sel__mes_unmap_queues__compute; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index bf20c6d32ef3..2adaf40027eb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, sizeof(struct pm4_mes_map_queues)); - packet->bitfields2.alloc_format = - alloc_format__mes_map_queues__one_per_pipe_vi; packet->bitfields2.num_queues = 1; packet->bitfields2.queue_sel = queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; @@ -212,6 +210,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, queue_type__mes_map_queues__debug_interface_queue_vi; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = q->properties.sdma_engine_id + engine_sel__mes_map_queues__sdma0_vi; use_static = false; /* no static queues under SDMA */ @@ -258,6 +257,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, engine_sel__mes_unmap_queues__compute; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index aed9b9b82213..9307811bc427 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -23,34 +23,54 @@ #include "kfd_mqd_manager.h" #include "amdgpu_amdkfd.h" +#include "kfd_device_queue_manager.h" -struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, - struct kfd_dev *dev) +struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev) { - switch (dev->device_info->asic_family) { - case CHIP_KAVERI: - return mqd_manager_init_cik(type, dev); - case CHIP_HAWAII: - return mqd_manager_init_cik_hawaii(type, dev); - case CHIP_CARRIZO: - return mqd_manager_init_vi(type, dev); - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - return mqd_manager_init_vi_tonga(type, dev); - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - return mqd_manager_init_v9(type, dev); - default: - WARN(1, "Unexpected ASIC family %u", - dev->device_info->asic_family); - } + struct kfd_mem_obj *mqd_mem_obj = NULL; + + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + if (!mqd_mem_obj) + return NULL; + + mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem; + mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr; + mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr; + + return mqd_mem_obj; +} + +struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, + struct queue_properties *q) +{ + struct kfd_mem_obj *mqd_mem_obj = NULL; + uint64_t offset; - return NULL; + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + if (!mqd_mem_obj) + return NULL; + + offset = (q->sdma_engine_id * + dev->device_info->num_sdma_queues_per_engine + + q->sdma_queue_id) * + dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; + + offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; + + mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem + + offset); + mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset; + mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t) + dev->dqm->hiq_sdma_mqd.cpu_ptr + offset); + + return mqd_mem_obj; +} + +void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + WARN_ON(!mqd_mem_obj->gtt_mem); + kfree(mqd_mem_obj); } void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index f8261313ae7b..56af256a191b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -99,8 +99,16 @@ struct mqd_manager { struct mutex mqd_mutex; struct kfd_dev *dev; + uint32_t mqd_size; }; +struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev); + +struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, + struct queue_properties *q); +void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj); + void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, const uint32_t *cu_mask, uint32_t cu_mask_count, uint32_t *se_mask); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index ae90a99909ef..6e8509ec29d9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -66,6 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3); } +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, + struct queue_properties *q) +{ + struct kfd_mem_obj *mqd_mem_obj; + + if (q->type == KFD_QUEUE_TYPE_HIQ) + return allocate_hiq_mqd(kfd); + + if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd), + &mqd_mem_obj)) + return NULL; + + return mqd_mem_obj; +} + + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -73,11 +89,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, uint64_t addr; struct cik_mqd *m; int retval; + struct kfd_dev *kfd = mm->dev; - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_mqd(kfd, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -136,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, { int retval; struct cik_sdma_rlc_registers *m; + struct kfd_dev *dev = mm->dev; - retval = kfd_gtt_sa_allocate(mm->dev, - sizeof(struct cik_sdma_rlc_registers), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_sdma_mqd(dev, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr; @@ -163,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd, kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, @@ -400,28 +408,43 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct cik_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct cik_mqd); +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_DIQ: + mqd->init_mqd = init_mqd_hiq; mqd->uninit_mqd = uninit_mqd; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct cik_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_SDMA: mqd->init_mqd = init_mqd_sdma; - mqd->uninit_mqd = uninit_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd_sdma; mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; mqd->is_occupied = is_occupied_sdma; + mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 9dbba609450e..4750338199b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -67,33 +67,54 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3); } -static int init_mqd(struct mqd_manager *mm, void **mqd, - struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, - struct queue_properties *q) +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, + struct queue_properties *q) { int retval; - uint64_t addr; - struct v9_mqd *m; - struct kfd_dev *kfd = mm->dev; + struct kfd_mem_obj *mqd_mem_obj = NULL; + + if (q->type == KFD_QUEUE_TYPE_HIQ) + return allocate_hiq_mqd(kfd); /* From V9, for CWSR, the control stack is located on the next page * boundary after the mqd, we will use the gtt allocation function * instead of sub-allocation function. */ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { - *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); - if (!*mqd_mem_obj) - return -ENOMEM; + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + if (!mqd_mem_obj) + return NULL; retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, ALIGN(q->ctl_stack_size, PAGE_SIZE) + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), - &((*mqd_mem_obj)->gtt_mem), - &((*mqd_mem_obj)->gpu_addr), - (void *)&((*mqd_mem_obj)->cpu_ptr), true); - } else - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd), - mqd_mem_obj); - if (retval != 0) + &(mqd_mem_obj->gtt_mem), + &(mqd_mem_obj->gpu_addr), + (void *)&(mqd_mem_obj->cpu_ptr), true); + } else { + retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd), + &mqd_mem_obj); + } + + if (retval) { + kfree(mqd_mem_obj); + return NULL; + } + + return mqd_mem_obj; + +} + +static int init_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + uint64_t addr; + struct v9_mqd *m; + struct kfd_dev *kfd = mm->dev; + + *mqd_mem_obj = allocate_mqd(kfd, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -328,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, { int retval; struct v9_sdma_mqd *m; + struct kfd_dev *dev = mm->dev; - - retval = kfd_gtt_sa_allocate(mm->dev, - sizeof(struct v9_sdma_mqd), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_sdma_mqd(dev, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -350,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, return retval; } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} - static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) @@ -459,28 +471,43 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->get_wave_state = get_wave_state; + mqd->mqd_size = sizeof(struct v9_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct v9_mqd); +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_DIQ: + mqd->init_mqd = init_mqd_hiq; mqd->uninit_mqd = uninit_mqd; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct v9_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_SDMA: mqd->init_mqd = init_mqd_sdma; - mqd->uninit_mqd = uninit_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd_sdma; mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; mqd->is_occupied = is_occupied_sdma; + mqd->mqd_size = sizeof(struct v9_sdma_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 6469b3456f00..b550dea9b10a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -68,6 +68,21 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3); } +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, + struct queue_properties *q) +{ + struct kfd_mem_obj *mqd_mem_obj; + + if (q->type == KFD_QUEUE_TYPE_HIQ) + return allocate_hiq_mqd(kfd); + + if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd), + &mqd_mem_obj)) + return NULL; + + return mqd_mem_obj; +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -75,10 +90,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, int retval; uint64_t addr; struct vi_mqd *m; + struct kfd_dev *kfd = mm->dev; - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd), - mqd_mem_obj); - if (retval != 0) + *mqd_mem_obj = allocate_mqd(kfd, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -329,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, { int retval; struct vi_sdma_mqd *m; + struct kfd_dev *dev = mm->dev; - - retval = kfd_gtt_sa_allocate(mm->dev, - sizeof(struct vi_sdma_mqd), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_sdma_mqd(dev, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -343,7 +355,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, memset(m, 0, sizeof(struct vi_sdma_mqd)); *mqd = m; - if (gart_addr != NULL) + if (gart_addr) *gart_addr = (*mqd_mem_obj)->gpu_addr; retval = mm->update_mqd(mm, m, q); @@ -351,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, return retval; } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} - static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) @@ -459,28 +465,43 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->get_wave_state = get_wave_state; + mqd->mqd_size = sizeof(struct vi_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct vi_mqd); +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_DIQ: + mqd->init_mqd = init_mqd_hiq; mqd->uninit_mqd = uninit_mqd; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct vi_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_SDMA: mqd->init_mqd = init_mqd_sdma; - mqd->uninit_mqd = uninit_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd_sdma; mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; mqd->is_occupied = is_occupied_sdma; + mqd->mqd_size = sizeof(struct vi_sdma_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 045a229436a0..808194663a7d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, process_count = pm->dqm->processes_count; queue_count = pm->dqm->queue_count; - compute_queue_count = queue_count - pm->dqm->sdma_queue_count; + compute_queue_count = queue_count - pm->dqm->sdma_queue_count - + pm->dqm->xgmi_sdma_queue_count; /* check if there is over subscription * Note: the arbitration between the number of VMIDs and @@ -227,6 +228,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: pm->pmf = &kfd_vi_pm_funcs; break; case CHIP_VEGA10: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index f2bcf5c092ea..49ab66b703fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -176,8 +176,7 @@ struct pm4_mes_map_process { union { struct { - uint32_t num_gws:6; - uint32_t reserved7:1; + uint32_t num_gws:7; uint32_t sdma_enable:1; uint32_t num_oac:4; uint32_t reserved8:4; @@ -255,11 +254,6 @@ enum mes_map_queues_queue_type_enum { queue_type__mes_map_queues__low_latency_static_queue_vi = 3 }; -enum mes_map_queues_alloc_format_enum { - alloc_format__mes_map_queues__one_per_pipe_vi = 0, -alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 -}; - enum mes_map_queues_engine_sel_enum { engine_sel__mes_map_queues__compute_vi = 0, engine_sel__mes_map_queues__sdma0_vi = 2, @@ -277,9 +271,11 @@ struct pm4_mes_map_queues { struct { uint32_t reserved1:4; enum mes_map_queues_queue_sel_enum queue_sel:2; - uint32_t reserved2:15; + uint32_t reserved5:6; + uint32_t gws_control_queue:1; + uint32_t reserved2:8; enum mes_map_queues_queue_type_enum queue_type:3; - enum mes_map_queues_alloc_format_enum alloc_format:2; + uint32_t reserved3:2; enum mes_map_queues_engine_sel_enum engine_sel:3; uint32_t num_queues:3; } bitfields2; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h index 7c8d9b357749..5466cfe1c3cc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h @@ -216,11 +216,6 @@ enum mes_map_queues_queue_type_vi_enum { queue_type__mes_map_queues__low_latency_static_queue_vi = 3 }; -enum mes_map_queues_alloc_format_vi_enum { - alloc_format__mes_map_queues__one_per_pipe_vi = 0, -alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 -}; - enum mes_map_queues_engine_sel_vi_enum { engine_sel__mes_map_queues__compute_vi = 0, engine_sel__mes_map_queues__sdma0_vi = 2, @@ -240,7 +235,7 @@ struct pm4_mes_map_queues { enum mes_map_queues_queue_sel_vi_enum queue_sel:2; uint32_t reserved2:15; enum mes_map_queues_queue_type_vi_enum queue_type:3; - enum mes_map_queues_alloc_format_vi_enum alloc_format:2; + uint32_t reserved3:2; enum mes_map_queues_engine_sel_vi_enum engine_sel:3; uint32_t num_queues:3; } bitfields2; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 487d5da337c1..b61dc53f42d2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -59,6 +59,7 @@ #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ @@ -160,6 +161,11 @@ extern int noretry; */ extern int halt_if_hws_hang; +/* + * Whether MEC FW support GWS barriers + */ +extern bool hws_gws_support; + enum cache_policy { cache_policy_coherent, cache_policy_noncoherent @@ -188,6 +194,7 @@ struct kfd_device_info { bool needs_iommu_device; bool needs_pci_atomics; unsigned int num_sdma_engines; + unsigned int num_xgmi_sdma_engines; unsigned int num_sdma_queues_per_engine; }; @@ -258,7 +265,7 @@ struct kfd_dev { bool interrupts_active; /* Debug manager */ - struct kfd_dbgmgr *dbgmgr; + struct kfd_dbgmgr *dbgmgr; /* Firmware versions */ uint16_t mec_fw_version; @@ -282,6 +289,9 @@ struct kfd_dev { /* Compute Profile ref. count */ atomic_t compute_profile; + + /* Global GWS resource shared b/t processes*/ + void *gws; }; enum kfd_mempool { @@ -329,7 +339,8 @@ enum kfd_queue_type { KFD_QUEUE_TYPE_COMPUTE, KFD_QUEUE_TYPE_SDMA, KFD_QUEUE_TYPE_HIQ, - KFD_QUEUE_TYPE_DIQ + KFD_QUEUE_TYPE_DIQ, + KFD_QUEUE_TYPE_SDMA_XGMI }; enum kfd_queue_format { @@ -444,6 +455,9 @@ struct queue_properties { * * @device: The kfd device that created this queue. * + * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL + * otherwise. + * * This structure represents user mode compute queues. * It contains all the necessary data to handle such queues. * @@ -465,6 +479,7 @@ struct queue { struct kfd_process *process; struct kfd_dev *device; + void *gws; }; /* @@ -475,6 +490,7 @@ enum KFD_MQD_TYPE { KFD_MQD_TYPE_HIQ, /* for hiq */ KFD_MQD_TYPE_CP, /* for cp queues and diq */ KFD_MQD_TYPE_SDMA, /* for sdma queues */ + KFD_MQD_TYPE_DIQ, /* for diq */ KFD_MQD_TYPE_MAX }; @@ -819,8 +835,6 @@ void uninit_queue(struct queue *q); void print_queue_properties(struct queue_properties *q); void print_queue(struct queue *q); -struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, - struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, @@ -859,6 +873,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); +int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, + void *gws); struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_get_wave_state(struct process_queue_manager *pqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index fcaaf93681ac..c2c570e6e54f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -26,6 +26,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_priv.h" #include "kfd_kernel_queue.h" +#include "amdgpu_amdkfd.h" static inline struct process_queue_node *get_queue_by_qid( struct process_queue_manager *pqm, unsigned int qid) @@ -74,6 +75,55 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) pdd->already_dequeued = true; } +int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, + void *gws) +{ + struct kfd_dev *dev = NULL; + struct process_queue_node *pqn; + struct kfd_process_device *pdd; + struct kgd_mem *mem = NULL; + int ret; + + pqn = get_queue_by_qid(pqm, qid); + if (!pqn) { + pr_err("Queue id does not match any known queue\n"); + return -EINVAL; + } + + if (pqn->q) + dev = pqn->q->device; + if (WARN_ON(!dev)) + return -ENODEV; + + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + return -EINVAL; + } + + /* Only allow one queue per process can have GWS assigned */ + if (gws && pdd->qpd.num_gws) + return -EINVAL; + + if (!gws && pdd->qpd.num_gws == 0) + return -EINVAL; + + if (gws) + ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, + gws, &mem); + else + ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, + pqn->q->gws); + if (unlikely(ret)) + return ret; + + pqn->q->gws = mem; + pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0; + + return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, + pqn->q); +} + void kfd_process_dequeue_from_all_devices(struct kfd_process *p) { struct kfd_process_device *pdd; @@ -186,8 +236,13 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: - if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) { - pr_err("Over-subscription is not allowed for SDMA.\n"); + case KFD_QUEUE_TYPE_SDMA_XGMI: + if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count + >= get_num_sdma_queues(dev->dqm)) || + (type == KFD_QUEUE_TYPE_SDMA_XGMI && + dev->dqm->xgmi_sdma_queue_count + >= get_num_xgmi_sdma_queues(dev->dqm))) { + pr_debug("Over-subscription is not allowed for SDMA.\n"); retval = -EPERM; goto err_create_queue; } @@ -325,6 +380,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } + + if (pqn->q->gws) { + amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, + pqn->q->gws); + pdd->qpd.num_gws = 0; + } + kfree(pqn->q->properties.cu_mask); pqn->q->properties.cu_mask = NULL; uninit_queue(pqn->q); @@ -446,6 +508,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) q = pqn->q; switch (q->properties.type) { case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: seq_printf(m, " SDMA queue on device %x\n", q->device->id); mqd_type = KFD_MQD_TYPE_SDMA; @@ -461,8 +524,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) q->properties.type, q->device->id); continue; } - mqd_mgr = q->device->dqm->ops.get_mqd_manager( - q->device->dqm, mqd_type); + mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type]; } else if (pqn->kq) { q = pqn->kq->queue; mqd_mgr = pqn->kq->mqd_mgr; @@ -470,7 +532,6 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) case KFD_QUEUE_TYPE_DIQ: seq_printf(m, " DIQ on device %x\n", pqn->kq->dev->id); - mqd_type = KFD_MQD_TYPE_HIQ; break; default: seq_printf(m, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 769dbc7be8cb..d241a8672599 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.lds_size_in_kb); sysfs_show_32bit_prop(buffer, "gds_size_in_kb", dev->node_props.gds_size_in_kb); + sysfs_show_32bit_prop(buffer, "num_gws", + dev->node_props.num_gws); sysfs_show_32bit_prop(buffer, "wave_front_size", dev->node_props.wave_front_size); sysfs_show_32bit_prop(buffer, "array_count", @@ -476,6 +478,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.drm_render_minor); sysfs_show_64bit_prop(buffer, "hive_id", dev->node_props.hive_id); + sysfs_show_32bit_prop(buffer, "num_sdma_engines", + dev->node_props.num_sdma_engines); + sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines", + dev->node_props.num_sdma_xgmi_engines); if (dev->gpu) { log_max_watch_addr = @@ -1078,8 +1084,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) local_mem_info.local_mem_size_public; buf[0] = gpu->pdev->devfn; - buf[1] = gpu->pdev->subsystem_vendor; - buf[2] = gpu->pdev->subsystem_device; + buf[1] = gpu->pdev->subsystem_vendor | + (gpu->pdev->subsystem_device << 16); + buf[2] = pci_domain_nr(gpu->pdev->bus); buf[3] = gpu->pdev->device; buf[4] = gpu->pdev->bus->number; buf[5] = lower_32_bits(local_mem_size); @@ -1281,6 +1288,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu) gpu->shared_resources.drm_render_minor; dev->node_props.hive_id = gpu->hive_id; + dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines; + dev->node_props.num_sdma_xgmi_engines = + gpu->device_info->num_xgmi_sdma_engines; + dev->node_props.num_gws = (hws_gws_support && + dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? + amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; kfd_fill_mem_clk_max_info(dev); kfd_fill_iolink_non_crat_info(dev); @@ -1298,6 +1311,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: pr_debug("Adding doorbell packet type capability\n"); dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 84710cfd23c2..276354aa0fcc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -65,6 +65,7 @@ struct kfd_node_properties { uint32_t max_waves_per_simd; uint32_t lds_size_in_kb; uint32_t gds_size_in_kb; + uint32_t num_gws; uint32_t wave_front_size; uint32_t array_count; uint32_t simd_arrays_per_engine; @@ -78,6 +79,8 @@ struct kfd_node_properties { uint32_t max_engine_clk_fcompute; uint32_t max_engine_clk_ccompute; int32_t drm_render_minor; + uint32_t num_sdma_engines; + uint32_t num_sdma_xgmi_engines; uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 0c25baded852..5c826faae240 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,6 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select DRM_AMD_DC_DCN1_0 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) - select DRM_AMD_DC_DCN1_01 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and @@ -17,11 +16,6 @@ config DRM_AMD_DC_DCN1_0 help RV family support for display engine -config DRM_AMD_DC_DCN1_01 - def_bool n - help - RV2 family for display engine - config DEBUG_KERNEL_DC bool "Enable kgdb break in DC" depends on DRM_AMD_DC diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 995f9df66142..53b76e0de940 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -29,6 +29,7 @@ #include "dm_services_types.h" #include "dc.h" #include "dc/inc/core_types.h" +#include "dal_asic_id.h" #include "vid.h" #include "amdgpu.h" @@ -615,6 +616,10 @@ error: static void amdgpu_dm_fini(struct amdgpu_device *adev) { amdgpu_dm_destroy_drm_device(&adev->dm); + + /* DC Destroy TODO: Replace destroy DAL */ + if (adev->dm.dc) + dc_destroy(&adev->dm.dc); /* * TODO: pageflip, vlank interrupt * @@ -629,9 +634,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) mod_freesync_destroy(adev->dm.freesync_module); adev->dm.freesync_module = NULL; } - /* DC Destroy TODO: Replace destroy DAL */ - if (adev->dm.dc) - dc_destroy(&adev->dm.dc); mutex_destroy(&adev->dm.dc_lock); @@ -640,7 +642,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) static int load_dmcu_fw(struct amdgpu_device *adev) { - const char *fw_name_dmcu; + const char *fw_name_dmcu = NULL; int r; const struct dmcu_firmware_header_v1_0 *hdr; @@ -663,7 +665,14 @@ static int load_dmcu_fw(struct amdgpu_device *adev) case CHIP_VEGA20: return 0; case CHIP_RAVEN: - fw_name_dmcu = FIRMWARE_RAVEN_DMCU; +#if defined(CONFIG_DRM_AMD_DC_DCN1_01) + if (ASICREV_IS_PICASSO(adev->external_rev_id)) + fw_name_dmcu = FIRMWARE_RAVEN_DMCU; + else if (ASICREV_IS_RAVEN2(adev->external_rev_id)) + fw_name_dmcu = FIRMWARE_RAVEN_DMCU; + else +#endif + return 0; break; default: DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); @@ -2584,7 +2593,7 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev, address->type = PLN_ADDR_TYPE_GRAPHICS; address->grph.addr.low_part = lower_32_bits(afb->address); address->grph.addr.high_part = upper_32_bits(afb->address); - } else { + } else if (format < SURFACE_PIXEL_FORMAT_INVALID) { uint64_t chroma_addr = afb->address + fb->offsets[1]; plane_size->video.luma_size.x = 0; @@ -2959,16 +2968,16 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode, } static enum dc_color_depth -convert_color_depth_from_display_info(const struct drm_connector *connector) +convert_color_depth_from_display_info(const struct drm_connector *connector, + const struct drm_connector_state *state) { - struct dm_connector_state *dm_conn_state = - to_dm_connector_state(connector->state); uint32_t bpc = connector->display_info.bpc; - /* TODO: Remove this when there's support for max_bpc in drm */ - if (dm_conn_state && bpc > dm_conn_state->max_bpc) - /* Round down to nearest even number. */ - bpc = dm_conn_state->max_bpc - (dm_conn_state->max_bpc & 1); + if (state) { + bpc = state->max_bpc; + /* Round down to the nearest even number. */ + bpc = bpc - (bpc & 1); + } switch (bpc) { case 0: @@ -3086,11 +3095,12 @@ static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_ } -static void -fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, - const struct drm_display_mode *mode_in, - const struct drm_connector *connector, - const struct dc_stream_state *old_stream) +static void fill_stream_properties_from_drm_display_mode( + struct dc_stream_state *stream, + const struct drm_display_mode *mode_in, + const struct drm_connector *connector, + const struct drm_connector_state *connector_state, + const struct dc_stream_state *old_stream) { struct dc_crtc_timing *timing_out = &stream->timing; const struct drm_display_info *info = &connector->display_info; @@ -3113,7 +3123,7 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, timing_out->timing_3d_format = TIMING_3D_FORMAT_NONE; timing_out->display_color_depth = convert_color_depth_from_display_info( - connector); + connector, connector_state); timing_out->scan_type = SCANNING_TYPE_NODATA; timing_out->hdmi_vic = 0; @@ -3310,6 +3320,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, { struct drm_display_mode *preferred_mode = NULL; struct drm_connector *drm_connector; + const struct drm_connector_state *con_state = + dm_state ? &dm_state->base : NULL; struct dc_stream_state *stream = NULL; struct drm_display_mode mode = *drm_mode; bool native_mode_found = false; @@ -3382,10 +3394,10 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, */ if (!scale || mode_refresh != preferred_refresh) fill_stream_properties_from_drm_display_mode(stream, - &mode, &aconnector->base, NULL); + &mode, &aconnector->base, con_state, NULL); else fill_stream_properties_from_drm_display_mode(stream, - &mode, &aconnector->base, old_stream); + &mode, &aconnector->base, con_state, old_stream); update_stream_scaling_settings(&mode, dm_state, stream); @@ -3610,9 +3622,6 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector, } else if (property == adev->mode_info.underscan_property) { dm_new_state->underscan_enable = val; ret = 0; - } else if (property == adev->mode_info.max_bpc_property) { - dm_new_state->max_bpc = val; - ret = 0; } else if (property == adev->mode_info.abm_level_property) { dm_new_state->abm_level = val; ret = 0; @@ -3658,9 +3667,6 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector, } else if (property == adev->mode_info.underscan_property) { *val = dm_state->underscan_enable; ret = 0; - } else if (property == adev->mode_info.max_bpc_property) { - *val = dm_state->max_bpc; - ret = 0; } else if (property == adev->mode_info.abm_level_property) { *val = dm_state->abm_level; ret = 0; @@ -3717,7 +3723,6 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->underscan_enable = false; state->underscan_hborder = 0; state->underscan_vborder = 0; - state->max_bpc = 8; __drm_atomic_helper_connector_reset(connector, &state->base); } @@ -3743,7 +3748,6 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector) new_state->underscan_enable = state->underscan_enable; new_state->underscan_hborder = state->underscan_hborder; new_state->underscan_vborder = state->underscan_vborder; - new_state->max_bpc = state->max_bpc; return &new_state->base; } @@ -4585,6 +4589,15 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector, amdgpu_dm_connector->num_modes = drm_add_edid_modes(connector, edid); + /* sorting the probed modes before calling function + * amdgpu_dm_get_native_mode() since EDID can have + * more than one preferred mode. The modes that are + * later in the probed mode list could be of higher + * and preferred resolution. For example, 3840x2160 + * resolution in base EDID preferred timing and 4096x2160 + * preferred resolution in DID extension block later. + */ + drm_mode_sort(&connector->probed_modes); amdgpu_dm_get_native_mode(connector); } else { amdgpu_dm_connector->num_modes = 0; @@ -4664,9 +4677,12 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, drm_object_attach_property(&aconnector->base.base, adev->mode_info.underscan_vborder_property, 0); - drm_object_attach_property(&aconnector->base.base, - adev->mode_info.max_bpc_property, - 0); + + drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16); + + /* This defaults to the max in the range, but we want 8bpc. */ + aconnector->base.state->max_bpc = 8; + aconnector->base.state->max_requested_bpc = 8; if (connector_type == DRM_MODE_CONNECTOR_eDP && dc_is_dmcu_initialized(adev->dm.dc)) { @@ -4945,12 +4961,12 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, int x, y; int xorigin = 0, yorigin = 0; - if (!crtc || !plane->state->fb) { - position->enable = false; - position->x = 0; - position->y = 0; + position->enable = false; + position->x = 0; + position->y = 0; + + if (!crtc || !plane->state->fb) return 0; - } if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) || (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) { @@ -4964,6 +4980,10 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, x = plane->state->crtc_x; y = plane->state->crtc_y; + if (x <= -amdgpu_crtc->max_cursor_width || + y <= -amdgpu_crtc->max_cursor_height) + return 0; + if (crtc->primary->state) { /* avivo cursor are offset into the total surface */ x += crtc->primary->state->src_x >> 16; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 978ff14a7d45..b0ce44422e90 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -304,7 +304,6 @@ struct dm_connector_state { enum amdgpu_rmx_type scaling; uint8_t underscan_vborder; uint8_t underscan_hborder; - uint8_t max_bpc; bool underscan_enable; bool freesync_capable; uint8_t abm_level; diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index fd5266a58297..12bc7ee66b18 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1313,6 +1313,8 @@ static enum bp_result bios_parser_get_encoder_cap_info( ATOM_ENCODER_CAP_RECORD_HBR3_EN) ? 1 : 0; info->HDMI_6GB_EN = (record->encodercaps & ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN) ? 1 : 0; + info->DP_IS_USB_C = (record->encodercaps & + ATOM_ENCODER_CAP_RECORD_USB_C_TYPE) ? 1 : 0; return BP_RESULT_OK; } diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c index 8196f3bb10c7..53deba42007a 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c @@ -57,11 +57,6 @@ bool dal_bios_parser_init_cmd_tbl_helper2( return true; #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case DCN_VERSION_1_0: - *h = dal_cmd_tbl_helper_dce112_get_table2(); - return true; -#endif - -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) case DCN_VERSION_1_01: *h = dal_cmd_tbl_helper_dce112_get_table2(); return true; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 18c775a950cc..4e17af2b63dc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -169,9 +169,14 @@ static bool create_links( link = link_create(&link_init_params); if (link) { - dc->links[dc->link_count] = link; - link->dc = dc; - ++dc->link_count; + if (dc->config.edp_not_connected && + link->connector_signal == SIGNAL_TYPE_EDP) { + link_destroy(&link); + } else { + dc->links[dc->link_count] = link; + link->dc = dc; + ++dc->link_count; + } } } @@ -1136,10 +1141,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c /* Program all planes within new context*/ for (i = 0; i < context->stream_count; i++) { const struct dc_link *link = context->streams[i]->link; - struct dc_stream_status *status; - - if (context->streams[i]->apply_seamless_boot_optimization) - context->streams[i]->apply_seamless_boot_optimization = false; if (!context->streams[i]->mode_changed) continue; @@ -1164,9 +1165,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c } } - status = dc_stream_get_status_from_state(context, context->streams[i]); - context->streams[i]->out.otg_offset = status->primary_otg_inst; - CONN_MSG_MODE(link, "{%dx%d, %dx%d@%dKhz}", context->streams[i]->timing.h_addressable, context->streams[i]->timing.v_addressable, @@ -1331,71 +1329,94 @@ static bool is_surface_in_context( static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u) { union surface_update_flags *update_flags = &u->surface->update_flags; + enum surface_update_type update_type = UPDATE_TYPE_FAST; if (!u->plane_info) return UPDATE_TYPE_FAST; - if (u->plane_info->color_space != u->surface->color_space) + if (u->plane_info->color_space != u->surface->color_space) { update_flags->bits.color_space_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } - if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) + if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) { update_flags->bits.horizontal_mirror_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } - if (u->plane_info->rotation != u->surface->rotation) + if (u->plane_info->rotation != u->surface->rotation) { update_flags->bits.rotation_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + } - if (u->plane_info->format != u->surface->format) + if (u->plane_info->format != u->surface->format) { update_flags->bits.pixel_format_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + } - if (u->plane_info->stereo_format != u->surface->stereo_format) + if (u->plane_info->stereo_format != u->surface->stereo_format) { update_flags->bits.stereo_format_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + } - if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) + if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) { update_flags->bits.per_pixel_alpha_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } - if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) + if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) { update_flags->bits.global_alpha_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } + + if (u->plane_info->sdr_white_level != u->surface->sdr_white_level) { + update_flags->bits.sdr_white_level = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } if (u->plane_info->dcc.enable != u->surface->dcc.enable || u->plane_info->dcc.grph.independent_64b_blks != u->surface->dcc.grph.independent_64b_blks - || u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) + || u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) { update_flags->bits.dcc_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } if (resource_pixel_format_to_bpp(u->plane_info->format) != - resource_pixel_format_to_bpp(u->surface->format)) + resource_pixel_format_to_bpp(u->surface->format)) { /* different bytes per element will require full bandwidth * and DML calculation */ update_flags->bits.bpp_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + } if (u->plane_info->plane_size.grph.surface_pitch != u->surface->plane_size.grph.surface_pitch || u->plane_info->plane_size.video.luma_pitch != u->surface->plane_size.video.luma_pitch - || u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch) + || u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch) { update_flags->bits.plane_size_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + } if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info, sizeof(union dc_tiling_info)) != 0) { update_flags->bits.swizzle_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_MED); + /* todo: below are HW dependent, we should add a hook to * DCE/N resource and validated there. */ - if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) + if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) { /* swizzled mode requires RQ to be setup properly, * thus need to run DML to calculate RQ settings */ update_flags->bits.bandwidth_change = 1; + elevate_update_type(&update_type, UPDATE_TYPE_FULL); + } } - if (update_flags->bits.rotation_change - || update_flags->bits.stereo_format_change - || update_flags->bits.pixel_format_change - || update_flags->bits.bpp_change - || update_flags->bits.bandwidth_change - || update_flags->bits.output_tf_change) - return UPDATE_TYPE_FULL; - - return update_flags->raw ? UPDATE_TYPE_MED : UPDATE_TYPE_FAST; + /* This should be UPDATE_TYPE_FAST if nothing has changed. */ + return update_type; } static enum surface_update_type get_scaling_info_update_type( @@ -1475,6 +1496,9 @@ static enum surface_update_type det_surface_update(const struct dc *dc, type = get_scaling_info_update_type(u); elevate_update_type(&overall_type, type); + if (u->flip_addr) + update_flags->bits.addr_update = 1; + if (u->in_transfer_func) update_flags->bits.in_transfer_func_change = 1; @@ -1792,10 +1816,15 @@ static void commit_planes_for_stream(struct dc *dc, if (dc->optimize_seamless_boot && surface_count > 0) { /* Optimize seamless boot flag keeps clocks and watermarks high until * first flip. After first flip, optimization is required to lower - * bandwidth. + * bandwidth. Important to note that it is expected UEFI will + * only light up a single display on POST, therefore we only expect + * one stream with seamless boot flag set. */ - dc->optimize_seamless_boot = false; - dc->optimized_required = true; + if (stream->apply_seamless_boot_optimization) { + stream->apply_seamless_boot_optimization = false; + dc->optimize_seamless_boot = false; + dc->optimized_required = true; + } } if (update_type == UPDATE_TYPE_FULL && !dc->optimize_seamless_boot) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 83d121510ef5..ca50ede37183 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -45,8 +45,10 @@ enum dc_color_space_type { COLOR_SPACE_RGB_LIMITED_TYPE, COLOR_SPACE_YCBCR601_TYPE, COLOR_SPACE_YCBCR709_TYPE, + COLOR_SPACE_YCBCR2020_TYPE, COLOR_SPACE_YCBCR601_LIMITED_TYPE, - COLOR_SPACE_YCBCR709_LIMITED_TYPE + COLOR_SPACE_YCBCR709_LIMITED_TYPE, + COLOR_SPACE_YCBCR709_BLACK_TYPE, }; static const struct tg_color black_color_format[] = { @@ -80,7 +82,6 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = { { COLOR_SPACE_YCBCR709_TYPE, { 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA, 0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} }, - /* TODO: correct values below */ { COLOR_SPACE_YCBCR601_LIMITED_TYPE, { 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991, @@ -88,6 +89,12 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = { { COLOR_SPACE_YCBCR709_LIMITED_TYPE, { 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3, 0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} }, + { COLOR_SPACE_YCBCR2020_TYPE, + { 0x1000, 0xF149, 0xFEB7, 0x0000, 0x0868, 0x15B2, + 0x01E6, 0x0000, 0xFB88, 0xF478, 0x1000, 0x0000} }, + { COLOR_SPACE_YCBCR709_BLACK_TYPE, + { 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000, + 0x0000, 0x0200, 0x0000, 0x0000, 0x0000, 0x1000} }, }; static bool is_rgb_type( @@ -149,6 +156,16 @@ static bool is_ycbcr709_type( return ret; } +static bool is_ycbcr2020_type( + enum dc_color_space color_space) +{ + bool ret = false; + + if (color_space == COLOR_SPACE_2020_YCBCR) + ret = true; + return ret; +} + static bool is_ycbcr709_limited_type( enum dc_color_space color_space) { @@ -174,7 +191,12 @@ enum dc_color_space_type get_color_space_type(enum dc_color_space color_space) type = COLOR_SPACE_YCBCR601_LIMITED_TYPE; else if (is_ycbcr709_limited_type(color_space)) type = COLOR_SPACE_YCBCR709_LIMITED_TYPE; - + else if (is_ycbcr2020_type(color_space)) + type = COLOR_SPACE_YCBCR2020_TYPE; + else if (color_space == COLOR_SPACE_YCBCR709) + type = COLOR_SPACE_YCBCR709_BLACK_TYPE; + else if (color_space == COLOR_SPACE_YCBCR709_BLACK) + type = COLOR_SPACE_YCBCR709_BLACK_TYPE; return type; } @@ -206,6 +228,7 @@ void color_space_to_black_color( switch (colorspace) { case COLOR_SPACE_YCBCR601: case COLOR_SPACE_YCBCR709: + case COLOR_SPACE_YCBCR709_BLACK: case COLOR_SPACE_YCBCR601_LIMITED: case COLOR_SPACE_YCBCR709_LIMITED: case COLOR_SPACE_2020_YCBCR: diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index b37ecc3ede61..e7236539f867 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -704,6 +704,7 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) if (new_connection_type != dc_connection_none) { link->type = new_connection_type; + link->link_state_valid = false; /* From Disconnected-to-Connected. */ switch (link->connector_signal) { @@ -906,10 +907,10 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK; /* Connectivity log: detection */ - for (i = 0; i < sink->dc_edid.length / EDID_BLOCK_SIZE; i++) { + for (i = 0; i < sink->dc_edid.length / DC_EDID_BLOCK_SIZE; i++) { CONN_DATA_DETECT(link, - &sink->dc_edid.raw_edid[i * EDID_BLOCK_SIZE], - EDID_BLOCK_SIZE, + &sink->dc_edid.raw_edid[i * DC_EDID_BLOCK_SIZE], + DC_EDID_BLOCK_SIZE, "%s: [Block %d] ", sink->edid_caps.display_name, i); } @@ -2631,6 +2632,8 @@ void core_link_enable_stream( stream->phy_pix_clk, pipe_ctx->stream_res.audio != NULL); + pipe_ctx->stream->link->link_state_valid = true; + if (dc_is_dvi_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dvi_set_stream_attribute( pipe_ctx->stream_res.stream_enc, @@ -2713,17 +2716,37 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option) { struct dc *core_dc = pipe_ctx->stream->ctx->dc; struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->sink->link; core_dc->hwss.blank_stream(pipe_ctx); if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); - if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) - dal_ddc_service_write_scdc_data( - stream->link->ddc, 0, - stream->timing.flags.LTE_340MCSC_SCRAMBLE); + if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) { + struct ext_hdmi_settings settings = {0}; + enum engine_id eng_id = pipe_ctx->stream_res.stream_enc->id; + unsigned short masked_chip_caps = link->chip_caps & + EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK; + //Need to inform that sink is going to use legacy HDMI mode. + dal_ddc_service_write_scdc_data( + link->ddc, + 165000,//vbios only handles 165Mhz. + false); + if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) { + /* DP159, Retimer settings */ + if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings)) + write_i2c_retimer_setting(pipe_ctx, + false, false, &settings); + else + write_i2c_default_retimer_setting(pipe_ctx, + false, false); + } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) { + /* PI3EQX1204, Redriver settings */ + write_i2c_redriver_setting(pipe_ctx, false); + } + } core_dc->hwss.disable_stream(pipe_ctx, option); disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index eac7186e4f08..b7952f39f3fc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -93,10 +93,8 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case FAMILY_RV: dc_version = DCN_VERSION_1_0; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev)) dc_version = DCN_VERSION_1_01; -#endif break; #endif default: @@ -147,9 +145,7 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc, #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case DCN_VERSION_1_0: -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) case DCN_VERSION_1_01: -#endif res_pool = dcn10_create_resource_pool(init_data, dc); break; #endif @@ -1184,24 +1180,27 @@ static int acquire_first_split_pipe( int i; for (i = 0; i < pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - - if (pipe_ctx->top_pipe && - pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) { - pipe_ctx->top_pipe->bottom_pipe = pipe_ctx->bottom_pipe; - if (pipe_ctx->bottom_pipe) - pipe_ctx->bottom_pipe->top_pipe = pipe_ctx->top_pipe; - - memset(pipe_ctx, 0, sizeof(*pipe_ctx)); - pipe_ctx->stream_res.tg = pool->timing_generators[i]; - pipe_ctx->plane_res.hubp = pool->hubps[i]; - pipe_ctx->plane_res.ipp = pool->ipps[i]; - pipe_ctx->plane_res.dpp = pool->dpps[i]; - pipe_ctx->stream_res.opp = pool->opps[i]; - pipe_ctx->plane_res.mpcc_inst = pool->dpps[i]->inst; - pipe_ctx->pipe_idx = i; - - pipe_ctx->stream = stream; + struct pipe_ctx *split_pipe = &res_ctx->pipe_ctx[i]; + + if (split_pipe->top_pipe && !dc_res_is_odm_head_pipe(split_pipe) && + split_pipe->top_pipe->plane_state == split_pipe->plane_state) { + split_pipe->top_pipe->bottom_pipe = split_pipe->bottom_pipe; + if (split_pipe->bottom_pipe) + split_pipe->bottom_pipe->top_pipe = split_pipe->top_pipe; + + if (split_pipe->top_pipe->plane_state) + resource_build_scaling_params(split_pipe->top_pipe); + + memset(split_pipe, 0, sizeof(*split_pipe)); + split_pipe->stream_res.tg = pool->timing_generators[i]; + split_pipe->plane_res.hubp = pool->hubps[i]; + split_pipe->plane_res.ipp = pool->ipps[i]; + split_pipe->plane_res.dpp = pool->dpps[i]; + split_pipe->stream_res.opp = pool->opps[i]; + split_pipe->plane_res.mpcc_inst = pool->dpps[i]->inst; + split_pipe->pipe_idx = i; + + split_pipe->stream = stream; return i; } } @@ -1647,46 +1646,6 @@ static int acquire_first_free_pipe( return -1; } -static struct stream_encoder *find_first_free_match_stream_enc_for_link( - struct resource_context *res_ctx, - const struct resource_pool *pool, - struct dc_stream_state *stream) -{ - int i; - int j = -1; - struct dc_link *link = stream->link; - - for (i = 0; i < pool->stream_enc_count; i++) { - if (!res_ctx->is_stream_enc_acquired[i] && - pool->stream_enc[i]) { - /* Store first available for MST second display - * in daisy chain use case */ - j = i; - if (pool->stream_enc[i]->id == - link->link_enc->preferred_engine) - return pool->stream_enc[i]; - } - } - - /* - * below can happen in cases when stream encoder is acquired: - * 1) for second MST display in chain, so preferred engine already - * acquired; - * 2) for another link, which preferred engine already acquired by any - * MST configuration. - * - * If signal is of DP type and preferred engine not found, return last available - * - * TODO - This is just a patch up and a generic solution is - * required for non DP connectors. - */ - - if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT) - return pool->stream_enc[j]; - - return NULL; -} - static struct audio *find_first_free_audio( struct resource_context *res_ctx, const struct resource_pool *pool, @@ -1998,7 +1957,7 @@ enum dc_status resource_map_pool_resources( pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx]; pipe_ctx->stream_res.stream_enc = - find_first_free_match_stream_enc_for_link( + dc->res_pool->funcs->find_first_free_match_stream_enc_for_link( &context->res_ctx, pool, stream); if (!pipe_ctx->stream_res.stream_enc) @@ -2354,7 +2313,18 @@ static void set_avi_info_frame( break; } } + /* If VIC >= 128, the Source shall use AVI InfoFrame Version 3*/ hdmi_info.bits.VIC0_VIC7 = vic; + if (vic >= 128) + hdmi_info.bits.header.version = 3; + /* If (C1, C0)=(1, 1) and (EC2, EC1, EC0)=(1, 1, 1), + * the Source shall use 20 AVI InfoFrame Version 4 + */ + if (hdmi_info.bits.C0_C1 == COLORIMETRY_EXTENDED && + hdmi_info.bits.EC0_EC2 == COLORIMETRYEX_RESERVED) { + hdmi_info.bits.header.version = 4; + hdmi_info.bits.header.length = 14; + } /* pixel repetition * PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel @@ -2373,12 +2343,19 @@ static void set_avi_info_frame( hdmi_info.bits.bar_right = (stream->timing.h_total - stream->timing.h_border_right + 1); + /* Additional Colorimetry Extension + * Used in conduction with C0-C1 and EC0-EC2 + * 0 = DCI-P3 RGB (D65) + * 1 = DCI-P3 RGB (theater) + */ + hdmi_info.bits.ACE0_ACE3 = 0; + /* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */ check_sum = &hdmi_info.packet_raw_data.sb[0]; - *check_sum = HDMI_INFOFRAME_TYPE_AVI + HDMI_AVI_INFOFRAME_SIZE + 2; + *check_sum = HDMI_INFOFRAME_TYPE_AVI + hdmi_info.bits.header.length + hdmi_info.bits.header.version; - for (byte_index = 1; byte_index <= HDMI_AVI_INFOFRAME_SIZE; byte_index++) + for (byte_index = 1; byte_index <= hdmi_info.bits.header.length; byte_index++) *check_sum += hdmi_info.packet_raw_data.sb[byte_index]; /* one byte complement */ @@ -2425,21 +2402,6 @@ static void set_spd_info_packet( *info_packet = stream->vrr_infopacket; } -static void set_dp_sdp_info_packet( - struct dc_info_packet *info_packet, - struct dc_stream_state *stream) -{ - /* SPD info packet for custom sdp message */ - - /* Return if false. If true, - * set the corresponding bit in the info packet - */ - if (!stream->dpsdp_infopacket.valid) - return; - - *info_packet = stream->dpsdp_infopacket; -} - static void set_hdr_static_info_packet( struct dc_info_packet *info_packet, struct dc_stream_state *stream) @@ -2495,7 +2457,6 @@ void dc_resource_state_copy_construct( if (cur_pipe->bottom_pipe) cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx]; - } for (i = 0; i < dst_ctx->stream_count; i++) { @@ -2536,7 +2497,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) info->spd.valid = false; info->hdrsmd.valid = false; info->vsc.valid = false; - info->dpsdp.valid = false; signal = pipe_ctx->stream->signal; @@ -2556,8 +2516,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx) set_spd_info_packet(&info->spd, pipe_ctx->stream); set_hdr_static_info_packet(&info->hdrsmd, pipe_ctx->stream); - - set_dp_sdp_info_packet(&info->dpsdp, pipe_ctx->stream); } patch_gamut_packet_checksum(&info->gamut); @@ -2644,6 +2602,10 @@ bool pipe_need_reprogram( if (is_vsc_info_packet_changed(pipe_ctx_old->stream, pipe_ctx->stream)) return true; + if (false == pipe_ctx_old->stream->link->link_state_valid && + false == pipe_ctx_old->stream->dpms_off) + return true; + return false; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 96e97d25d639..b723ffc8ea25 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -47,8 +47,8 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink) if (dc_is_dvi_signal(stream->signal)) { if (stream->ctx->dc->caps.dual_link_dvi && - (stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK && - sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) + (stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK && + sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK; else stream->signal = SIGNAL_TYPE_DVI_SINGLE_LINK; @@ -371,42 +371,12 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream) return 0; } -static void build_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx, - const uint8_t *custom_sdp_message, - unsigned int sdp_message_size) -{ - uint8_t i; - struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame; - - /* set valid info */ - info->dpsdp.valid = true; - - /* set sdp message header */ - info->dpsdp.hb0 = custom_sdp_message[0]; /* package id */ - info->dpsdp.hb1 = custom_sdp_message[1]; /* package type */ - info->dpsdp.hb2 = custom_sdp_message[2]; /* package specific byte 0 any data */ - info->dpsdp.hb3 = custom_sdp_message[3]; /* package specific byte 0 any data */ - - /* set sdp message data */ - for (i = 0; i < 32; i++) - info->dpsdp.sb[i] = (custom_sdp_message[i+4]); - -} - -static void invalid_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx) -{ - struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame; - - /* in-valid info */ - info->dpsdp.valid = false; -} - bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream, const uint8_t *custom_sdp_message, unsigned int sdp_message_size) { int i; - struct dc *core_dc; + struct dc *dc; struct resource_context *res_ctx; if (stream == NULL) { @@ -414,8 +384,8 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream, return false; } - core_dc = stream->ctx->dc; - res_ctx = &core_dc->current_state->res_ctx; + dc = stream->ctx->dc; + res_ctx = &dc->current_state->res_ctx; for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; @@ -423,11 +393,14 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream, if (pipe_ctx->stream != stream) continue; - build_dp_sdp_info_frame(pipe_ctx, custom_sdp_message, sdp_message_size); - - core_dc->hwss.update_info_frame(pipe_ctx); + if (dc->hwss.send_immediate_sdp_message != NULL) + dc->hwss.send_immediate_sdp_message(pipe_ctx, + custom_sdp_message, + sdp_message_size); + else + DC_LOG_WARNING("%s:send_immediate_sdp_message not implemented on this ASIC\n", + __func__); - invalid_dp_sdp_info_frame(pipe_ctx); } return true; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 44e4b0465587..566111ff463e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.27" +#define DC_VER "3.2.31" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -205,6 +205,7 @@ struct dc_config { bool disable_fractional_pwm; bool allow_seamless_boot_optimization; bool power_down_display_on_boot; + bool edp_not_connected; }; enum visual_confirm { @@ -540,12 +541,14 @@ struct dc_plane_status { union surface_update_flags { struct { + uint32_t addr_update:1; /* Medium updates */ uint32_t dcc_change:1; uint32_t color_space_change:1; uint32_t horizontal_mirror_change:1; uint32_t per_pixel_alpha_change:1; uint32_t global_alpha_change:1; + uint32_t sdr_white_level:1; uint32_t rotation_change:1; uint32_t swizzle_change:1; uint32_t scaling_change:1; diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 5e6c5eff49cf..2d0acf109360 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -297,7 +297,7 @@ void generic_reg_wait(const struct dc_context *ctx, int i; /* something is terribly wrong if time out is > 200ms. (5Hz) */ - ASSERT(delay_between_poll_us * time_out_num_tries <= 200000); + ASSERT(delay_between_poll_us * time_out_num_tries <= 3000000); for (i = 0; i <= time_out_num_tries; i++) { if (i) { diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index da55d623647a..c91b8aad78c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -534,6 +534,7 @@ enum dc_color_space { COLOR_SPACE_DOLBYVISION, COLOR_SPACE_APPCTRL, COLOR_SPACE_CUSTOMPOINTS, + COLOR_SPACE_YCBCR709_BLACK, }; enum dc_dither_option { diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 7b9429e30d82..094009127e25 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -75,6 +75,7 @@ struct dc_link { enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */ bool is_hpd_filter_disabled; bool dp_ss_off; + bool link_state_valid; /* caps is the same as reported_link_cap. link_traing use * reported_link_cap. Will clean up. TODO diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 6c2a3d9a4c2e..92a670894c05 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -104,7 +104,7 @@ struct dc_context { #define DC_MAX_EDID_BUFFER_SIZE 1024 -#define EDID_BLOCK_SIZE 128 +#define DC_EDID_BLOCK_SIZE 128 #define MAX_SURFACE_NUM 4 #define NUM_PIXEL_FORMATS 10 diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index da96229db53a..2959c3c9390b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -473,6 +473,8 @@ void dce_abm_destroy(struct abm **abm) { struct dce_abm *abm_dce = TO_DCE_ABM(*abm); + abm_dce->base.funcs->set_abm_immediate_disable(*abm); + kfree(abm_dce); *abm = NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c index 963686380738..6b2e207777f0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -241,6 +241,7 @@ static enum dm_pp_clocks_state dce_get_required_clocks_state( return low_req_clk; } +/* TODO: remove use the two broken down functions */ static int dce_set_clock( struct clk_mgr *clk_mgr, int requested_clk_khz) @@ -336,6 +337,75 @@ int dce112_set_clock(struct clk_mgr *clk_mgr, int requested_clk_khz) return actual_clock; } +int dce112_set_dispclk(struct clk_mgr *clk_mgr, int requested_clk_khz) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + struct bp_set_dce_clock_parameters dce_clk_params; + struct dc_bios *bp = clk_mgr->ctx->dc_bios; + struct dc *core_dc = clk_mgr->ctx->dc; + struct dmcu *dmcu = core_dc->res_pool->dmcu; + int actual_clock = requested_clk_khz; + /* Prepare to program display clock*/ + memset(&dce_clk_params, 0, sizeof(dce_clk_params)); + + /* Make sure requested clock isn't lower than minimum threshold*/ + if (requested_clk_khz > 0) + requested_clk_khz = max(requested_clk_khz, + clk_mgr_dce->dentist_vco_freq_khz / 62); + + dce_clk_params.target_clock_frequency = requested_clk_khz; + dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; + dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK; + + bp->funcs->set_dce_clock(bp, &dce_clk_params); + actual_clock = dce_clk_params.target_clock_frequency; + + /* + * from power down, we need mark the clock state as ClocksStateNominal + * from HWReset, so when resume we will call pplib voltage regulator. + */ + if (requested_clk_khz == 0) + clk_mgr_dce->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; + + + if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) { + if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) { + if (clk_mgr_dce->dfs_bypass_disp_clk != actual_clock) + dmcu->funcs->set_psr_wait_loop(dmcu, + actual_clock / 1000 / 7); + } + } + + clk_mgr_dce->dfs_bypass_disp_clk = actual_clock; + return actual_clock; + +} + +int dce112_set_dprefclk(struct clk_mgr *clk_mgr) +{ + struct bp_set_dce_clock_parameters dce_clk_params; + struct dc_bios *bp = clk_mgr->ctx->dc_bios; + + memset(&dce_clk_params, 0, sizeof(dce_clk_params)); + + /*Program DP ref Clock*/ + /*VBIOS will determine DPREFCLK frequency, so we don't set it*/ + dce_clk_params.target_clock_frequency = 0; + dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; + dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK; + if (!ASICREV_IS_VEGA20_P(clk_mgr->ctx->asic_id.hw_internal_rev)) + dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = + (dce_clk_params.pll_id == + CLOCK_SOURCE_COMBO_DISPLAY_PLL0); + else + dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false; + + bp->funcs->set_dce_clock(bp, &dce_clk_params); + + /* Returns the dp_refclk that was set */ + return dce_clk_params.target_clock_frequency; +} + static void dce_clock_read_integrated_info(struct dce_clk_mgr *clk_mgr_dce) { struct dc_debug_options *debug = &clk_mgr_dce->base.ctx->dc->debug; @@ -782,22 +852,22 @@ static void dce12_update_clocks(struct clk_mgr *clk_mgr, dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); } -static const struct clk_mgr_funcs dce120_funcs = { +static struct clk_mgr_funcs dce120_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = dce12_update_clocks }; -static const struct clk_mgr_funcs dce112_funcs = { +static struct clk_mgr_funcs dce112_funcs = { .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, .update_clocks = dce112_update_clocks }; -static const struct clk_mgr_funcs dce110_funcs = { +static struct clk_mgr_funcs dce110_funcs = { .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, .update_clocks = dce11_update_clocks, }; -static const struct clk_mgr_funcs dce_funcs = { +static struct clk_mgr_funcs dce_funcs = { .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, .update_clocks = dce_update_clocks }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h index c8f8c442142a..cca0c95d8cc8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h @@ -39,6 +39,11 @@ #define CLK_COMMON_REG_LIST_DCN_BASE() \ SR(DENTIST_DISPCLK_CNTL) +#define VBIOS_SMU_MSG_BOX_REG_LIST_RV() \ + .MP1_SMN_C2PMSG_91 = mmMP1_SMN_C2PMSG_91, \ + .MP1_SMN_C2PMSG_83 = mmMP1_SMN_C2PMSG_83, \ + .MP1_SMN_C2PMSG_67 = mmMP1_SMN_C2PMSG_67 + #define CLK_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix @@ -50,23 +55,39 @@ CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, mask_sh),\ CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh) +#define CLK_MASK_SH_LIST_RV1(mask_sh) \ + CLK_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh),\ + CLK_SF(MP1_SMN_C2PMSG_67, CONTENT, mask_sh),\ + CLK_SF(MP1_SMN_C2PMSG_83, CONTENT, mask_sh),\ + CLK_SF(MP1_SMN_C2PMSG_91, CONTENT, mask_sh), + + #define CLK_REG_FIELD_LIST(type) \ type DPREFCLK_SRC_SEL; \ type DENTIST_DPREFCLK_WDIVIDER; \ type DENTIST_DISPCLK_WDIVIDER; \ type DENTIST_DISPCLK_CHG_DONE; +#define VBIOS_SMU_REG_FIELD_LIST(type) \ + type CONTENT; + struct clk_mgr_shift { CLK_REG_FIELD_LIST(uint8_t) + VBIOS_SMU_REG_FIELD_LIST(uint32_t) }; struct clk_mgr_mask { CLK_REG_FIELD_LIST(uint32_t) + VBIOS_SMU_REG_FIELD_LIST(uint32_t) }; struct clk_mgr_registers { uint32_t DPREFCLK_CNTL; uint32_t DENTIST_DISPCLK_CNTL; + + uint32_t MP1_SMN_C2PMSG_67; + uint32_t MP1_SMN_C2PMSG_83; + uint32_t MP1_SMN_C2PMSG_91; }; struct state_dependent_clocks { @@ -168,6 +189,8 @@ void dce110_fill_display_configs( struct dm_pp_display_configuration *pp_display_cfg); int dce112_set_clock(struct clk_mgr *dccg, int requested_clk_khz); +int dce112_set_dispclk(struct clk_mgr *clk_mgr, int requested_clk_khz); +int dce112_set_dprefclk(struct clk_mgr *clk_mgr); struct clk_mgr *dce_clk_mgr_create( struct dc_context *ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index f70437aae8e0..df422440845b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -183,8 +183,8 @@ static bool calculate_fb_and_fractional_fb_divider( *RETURNS: * It fills the PLLSettings structure with PLL Dividers values * if calculated values are within required tolerance -* It returns - true if eror is within tolerance -* - false if eror is not within tolerance +* It returns - true if error is within tolerance +* - false if error is not within tolerance */ static bool calc_fb_divider_checking_tolerance( struct calc_pll_clock_source *calc_pll_cs, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c index cd26161bcc4d..526aab438374 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c @@ -268,6 +268,8 @@ static bool setup_engine( struct dce_i2c_hw *dce_i2c_hw) { uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE; + /* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/ + REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1); if (dce_i2c_hw->setup_limit != 0) i2c_setup_limit = dce_i2c_hw->setup_limit; @@ -322,8 +324,6 @@ static void release_engine( set_speed(dce_i2c_hw, dce_i2c_hw->original_speed); - /* Release I2C */ - REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1); /* Reset HW engine */ { @@ -343,6 +343,9 @@ static void release_engine( /* HW I2c engine - clock gating feature */ if (!dce_i2c_hw->engine_keep_power_up_count) REG_UPDATE_N(SETUP, 1, FN(SETUP, DC_I2C_DDC1_ENABLE), 0); + /* Release I2C after reset, so HW or DMCU could use it */ + REG_UPDATE_2(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1, + DC_I2C_SW_USE_I2C_REG_REQ, 0); } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h index 575500755b2e..f718e3d396f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h @@ -105,6 +105,7 @@ enum { I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_DATA_DRIVE_SEL, mask_sh),\ I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_TRANSACTION_DELAY, mask_sh),\ I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_BYTE_DELAY, mask_sh),\ + I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, mask_sh),\ I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, mask_sh),\ I2C_SF(DC_I2C_ARBITRATION, DC_I2C_NO_QUEUED_SW_GO, mask_sh),\ I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_PRIORITY, mask_sh),\ @@ -146,6 +147,7 @@ struct dce_i2c_shift { uint8_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY; uint8_t DC_I2C_DDC1_INTRA_BYTE_DELAY; uint8_t DC_I2C_SW_DONE_USING_I2C_REG; + uint8_t DC_I2C_SW_USE_I2C_REG_REQ; uint8_t DC_I2C_NO_QUEUED_SW_GO; uint8_t DC_I2C_SW_PRIORITY; uint8_t DC_I2C_SOFT_RESET; @@ -184,6 +186,7 @@ struct dce_i2c_mask { uint32_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY; uint32_t DC_I2C_DDC1_INTRA_BYTE_DELAY; uint32_t DC_I2C_SW_DONE_USING_I2C_REG; + uint32_t DC_I2C_SW_USE_I2C_REG_REQ; uint32_t DC_I2C_NO_QUEUED_SW_GO; uint32_t DC_I2C_SW_PRIORITY; uint32_t DC_I2C_SOFT_RESET; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 14309fe6f2e6..61fe2596fdb3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -418,6 +418,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( break; case COLOR_SPACE_YCBCR709: case COLOR_SPACE_YCBCR709_LIMITED: + case COLOR_SPACE_YCBCR709_BLACK: misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ dynamic_range_ycbcr = 1; /*bt709*/ diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index e938bf9986d3..d7a531e9700f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -867,13 +867,55 @@ enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, s return DC_FAIL_SURFACE_VALIDATE; } +struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream) +{ + int i; + int j = -1; + struct dc_link *link = stream->link; + + for (i = 0; i < pool->stream_enc_count; i++) { + if (!res_ctx->is_stream_enc_acquired[i] && + pool->stream_enc[i]) { + /* Store first available for MST second display + * in daisy chain use case + */ + j = i; + if (pool->stream_enc[i]->id == + link->link_enc->preferred_engine) + return pool->stream_enc[i]; + } + } + + /* + * below can happen in cases when stream encoder is acquired: + * 1) for second MST display in chain, so preferred engine already + * acquired; + * 2) for another link, which preferred engine already acquired by any + * MST configuration. + * + * If signal is of DP type and preferred engine not found, return last available + * + * TODO - This is just a patch up and a generic solution is + * required for non DP connectors. + */ + + if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT) + return pool->stream_enc[j]; + + return NULL; +} + static const struct resource_funcs dce100_res_pool_funcs = { .destroy = dce100_destroy_resource_pool, .link_enc_create = dce100_link_encoder_create, .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce100_validate_global + .validate_global = dce100_validate_global, + .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link }; static bool construct( diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h index 2f366d66635d..fecab7c560f5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h @@ -46,4 +46,9 @@ enum dc_status dce100_add_stream_to_ctx( struct dc_state *new_ctx, struct dc_stream_state *dc_stream); +struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream); + #endif /* DCE100_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 7ac50ab1b762..69f215967af3 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -242,6 +242,9 @@ static void build_prescale_params(struct ipp_prescale_params *prescale_params, prescale_params->mode = IPP_PRESCALE_MODE_FIXED_UNSIGNED; switch (plane_state->format) { + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + prescale_params->scale = 0x2082; + break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888: case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888: prescale_params->scale = 0x2020; @@ -1296,6 +1299,11 @@ static enum dc_status dce110_enable_stream_timing( pipe_ctx->stream_res.tg->funcs->program_timing( pipe_ctx->stream_res.tg, &stream->timing, + 0, + 0, + 0, + 0, + pipe_ctx->stream->signal, true); } @@ -1488,10 +1496,11 @@ static void disable_vga_and_power_gate_all_controllers( } } -static struct dc_link *get_link_for_edp(struct dc *dc) +static struct dc_link *get_edp_link(struct dc *dc) { int i; + // report any eDP links, even unconnected DDI's for (i = 0; i < dc->link_count; i++) { if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP) return dc->links[i]; @@ -1499,23 +1508,13 @@ static struct dc_link *get_link_for_edp(struct dc *dc) return NULL; } -static struct dc_link *get_link_for_edp_to_turn_off( +static struct dc_link *get_edp_link_with_sink( struct dc *dc, struct dc_state *context) { int i; struct dc_link *link = NULL; - /* check if eDP panel is suppose to be set mode, if yes, no need to disable */ - for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { - if (context->streams[i]->dpms_off == true) - return context->streams[i]->sink->link; - else - return NULL; - } - } - /* check if there is an eDP panel not in use */ for (i = 0; i < dc->link_count; i++) { if (dc->links[i]->local_sink && @@ -1538,59 +1537,53 @@ static struct dc_link *get_link_for_edp_to_turn_off( void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) { int i; - struct dc_link *edp_link_to_turnoff = NULL; - struct dc_link *edp_link = get_link_for_edp(dc); - bool can_edp_fast_boot_optimize = false; - bool apply_edp_fast_boot_optimization = false; + struct dc_link *edp_link_with_sink = get_edp_link_with_sink(dc, context); + struct dc_link *edp_link = get_edp_link(dc); + bool can_apply_edp_fast_boot = false; bool can_apply_seamless_boot = false; - for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->apply_seamless_boot_optimization) { - can_apply_seamless_boot = true; - break; - } - } - if (dc->hwss.init_pipes) dc->hwss.init_pipes(dc, context); - if (edp_link) { - /* this seems to cause blank screens on DCE8 */ - if ((dc->ctx->dce_version == DCE_VERSION_8_0) || - (dc->ctx->dce_version == DCE_VERSION_8_1) || - (dc->ctx->dce_version == DCE_VERSION_8_3)) - can_edp_fast_boot_optimize = false; - else - can_edp_fast_boot_optimize = - edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc); + // Check fastboot support, disable on DCE8 because of blank screens + if (edp_link && dc->ctx->dce_version != DCE_VERSION_8_0 && + dc->ctx->dce_version != DCE_VERSION_8_1 && + dc->ctx->dce_version != DCE_VERSION_8_3) { + + // enable fastboot if backend is enabled on eDP + if (edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc)) { + /* Find eDP stream and set optimization flag */ + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { + context->streams[i]->apply_edp_fast_boot_optimization = true; + can_apply_edp_fast_boot = true; + break; + } + } + } } - if (can_edp_fast_boot_optimize) - edp_link_to_turnoff = get_link_for_edp_to_turn_off(dc, context); - - /* if OS doesn't light up eDP and eDP link is available, we want to disable - * If resume from S4/S5, should optimization. - */ - if (can_edp_fast_boot_optimize && !edp_link_to_turnoff) { - /* Find eDP stream and set optimization flag */ - for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { - context->streams[i]->apply_edp_fast_boot_optimization = true; - apply_edp_fast_boot_optimization = true; - } + // Check seamless boot support + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->apply_seamless_boot_optimization) { + can_apply_seamless_boot = true; + break; } } - if (!apply_edp_fast_boot_optimization && !can_apply_seamless_boot) { - if (edp_link_to_turnoff) { + /* eDP should not have stream in resume from S4 and so even with VBios post + * it should get turned off + */ + if (!can_apply_edp_fast_boot && !can_apply_seamless_boot) { + if (edp_link_with_sink) { /*turn off backlight before DP_blank and encoder powered down*/ - dc->hwss.edp_backlight_control(edp_link_to_turnoff, false); + dc->hwss.edp_backlight_control(edp_link_with_sink, false); } /*resume from S3, no vbios posting, no need to power down again*/ power_down_all_hw_blocks(dc); disable_vga_and_power_gate_all_controllers(dc); - if (edp_link_to_turnoff) - dc->hwss.edp_power_control(edp_link_to_turnoff, false); + if (edp_link_with_sink) + dc->hwss.edp_power_control(edp_link_with_sink, false); } bios_set_scratch_acc_mode_change(dc->ctx->dc_bios); } @@ -2030,8 +2023,10 @@ enum dc_status dce110_apply_ctx_to_hw( if (pipe_ctx->stream == NULL) continue; - if (pipe_ctx->stream == pipe_ctx_old->stream) + if (pipe_ctx->stream == pipe_ctx_old->stream && + pipe_ctx->stream->link->link_state_valid) { continue; + } if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) continue; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index dcd04e9ea76b..f982c8b196cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -1097,6 +1097,11 @@ static struct pipe_ctx *dce110_acquire_underlay( pipe_ctx->stream_res.tg->funcs->program_timing(pipe_ctx->stream_res.tg, &stream->timing, + 0, + 0, + 0, + 0, + pipe_ctx->stream->signal, false); pipe_ctx->stream_res.tg->funcs->enable_advanced_request( @@ -1129,6 +1134,38 @@ static void dce110_destroy_resource_pool(struct resource_pool **pool) *pool = NULL; } +struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream) +{ + int i; + int j = -1; + struct dc_link *link = stream->link; + + for (i = 0; i < pool->stream_enc_count; i++) { + if (!res_ctx->is_stream_enc_acquired[i] && + pool->stream_enc[i]) { + /* Store first available for MST second display + * in daisy chain use case + */ + j = i; + if (pool->stream_enc[i]->id == + link->link_enc->preferred_engine) + return pool->stream_enc[i]; + } + } + + /* + * For CZ and later, we can allow DIG FE and BE to differ for all display types + */ + + if (j >= 0) + return pool->stream_enc[j]; + + return NULL; +} + static const struct resource_funcs dce110_res_pool_funcs = { .destroy = dce110_destroy_resource_pool, @@ -1137,7 +1174,8 @@ static const struct resource_funcs dce110_res_pool_funcs = { .validate_plane = dce110_validate_plane, .acquire_idle_pipe_for_layer = dce110_acquire_underlay, .add_stream_to_ctx = dce110_add_stream_to_ctx, - .validate_global = dce110_validate_global + .validate_global = dce110_validate_global, + .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link }; static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h index e5f168c1f8c8..aa4531e0800e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h @@ -45,5 +45,10 @@ struct resource_pool *dce110_create_resource_pool( struct dc *dc, struct hw_asic_id asic_id); +struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream); + #endif /* __DC_RESOURCE_DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index 1b2fe0df347f..5f7c2c5641c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -1952,6 +1952,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg, void dce110_tg_program_timing(struct timing_generator *tg, const struct dc_crtc_timing *timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios) { if (use_vbios) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h index 734d4965dab1..768ccf27ada9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h @@ -256,6 +256,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg, void dce110_tg_program_timing(struct timing_generator *tg, const struct dc_crtc_timing *timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios); bool dce110_tg_is_blanked(struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c index a3cef60380ed..a13a2f58944e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c @@ -435,6 +435,11 @@ static void dce110_timing_generator_v_set_blank(struct timing_generator *tg, static void dce110_timing_generator_v_program_timing(struct timing_generator *tg, const struct dc_crtc_timing *timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios) { if (use_vbios) diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index a480b15f6885..cdf759b0f5f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -993,7 +993,8 @@ static const struct resource_funcs dce112_res_pool_funcs = { .validate_bandwidth = dce112_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce112_add_stream_to_ctx, - .validate_global = dce112_validate_global + .validate_global = dce112_validate_global, + .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link }; static void bw_calcs_data_update_from_pplib(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 6d49c7143c67..9e6a5d84b0a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -480,7 +480,7 @@ static const struct dc_debug_options debug_defaults = { .disable_clock_gate = true, }; -struct clock_source *dce120_clock_source_create( +static struct clock_source *dce120_clock_source_create( struct dc_context *ctx, struct dc_bios *bios, enum clock_source_id id, @@ -503,14 +503,14 @@ struct clock_source *dce120_clock_source_create( return NULL; } -void dce120_clock_source_destroy(struct clock_source **clk_src) +static void dce120_clock_source_destroy(struct clock_source **clk_src) { kfree(TO_DCE110_CLK_SRC(*clk_src)); *clk_src = NULL; } -bool dce120_hw_sequencer_create(struct dc *dc) +static bool dce120_hw_sequencer_create(struct dc *dc) { /* All registers used by dce11.2 match those in dce11 in offset and * structure @@ -837,7 +837,8 @@ static const struct resource_funcs dce120_res_pool_funcs = { .link_enc_create = dce120_link_encoder_create, .validate_bandwidth = dce112_validate_bandwidth, .validate_plane = dce100_validate_plane, - .add_stream_to_ctx = dce112_add_stream_to_ctx + .add_stream_to_ctx = dce112_add_stream_to_ctx, + .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link }; static void bw_calcs_data_update_from_pplib(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index 04b866f0fa1f..098e56962f2a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -734,8 +734,13 @@ void dce120_tg_set_overscan_color(struct timing_generator *tg, CRTC_OVERSCAN_COLOR_RED, overscan_color->color_r_cr); } -void dce120_tg_program_timing(struct timing_generator *tg, +static void dce120_tg_program_timing(struct timing_generator *tg, const struct dc_crtc_timing *timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios) { if (use_vbios) @@ -1109,6 +1114,92 @@ static bool dce120_arm_vert_intr( return true; } + +static bool dce120_is_tg_enabled(struct timing_generator *tg) +{ + struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + uint32_t value, field; + + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CONTROL, + tg110->offsets.crtc); + field = get_reg_field_value(value, CRTC0_CRTC_CONTROL, + CRTC_CURRENT_MASTER_EN_STATE); + + return field == 1; +} + +static bool dce120_configure_crc(struct timing_generator *tg, + const struct crc_params *params) +{ + struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + + /* Cannot configure crc on a CRTC that is disabled */ + if (!dce120_is_tg_enabled(tg)) + return false; + + /* First, disable CRC before we configure it. */ + dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, + tg110->offsets.crtc, 0); + + if (!params->enable) + return true; + + /* Program frame boundaries */ + /* Window A x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL, + CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start, + CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end); + + /* Window A y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL, + CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start, + CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end); + + /* Window B x axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL, + CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start, + CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end); + + /* Window B y axis start and end. */ + CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL, + CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start, + CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end); + + /* Set crc mode and selection, and enable. Only using CRC0*/ + CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL, + CRTC_CRC_EN, params->continuous_mode ? 1 : 0, + CRTC_CRC0_SELECT, params->selection, + CRTC_CRC_EN, 1); + + return true; +} + +static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr, + uint32_t *g_y, uint32_t *b_cb) +{ + struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg); + uint32_t value, field; + + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL, + tg110->offsets.crtc); + field = get_reg_field_value(value, CRTC0_CRTC_CRC_CNTL, CRTC_CRC_EN); + + /* Early return if CRC is not enabled for this CRTC */ + if (!field) + return false; + + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG, + tg110->offsets.crtc); + *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR); + *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y); + + value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B, + tg110->offsets.crtc); + *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB); + + return true; +} + static const struct timing_generator_funcs dce120_tg_funcs = { .validate_timing = dce120_tg_validate_timing, .program_timing = dce120_tg_program_timing, @@ -1140,6 +1231,9 @@ static const struct timing_generator_funcs dce120_tg_funcs = { .set_static_screen_control = dce120_timing_generator_set_static_screen_control, .set_test_pattern = dce120_timing_generator_set_test_pattern, .arm_vert_intr = dce120_arm_vert_intr, + .is_tg_enabled = dce120_is_tg_enabled, + .configure_crc = dce120_configure_crc, + .get_crc = dce120_get_crc, }; diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 27d0cc394963..2c21135a8510 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -880,7 +880,8 @@ static const struct resource_funcs dce80_res_pool_funcs = { .validate_bandwidth = dce80_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, - .validate_global = dce80_validate_global + .validate_global = dce80_validate_global, + .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link }; static bool dce80_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c index 8b5ce557ee71..397e7f94e1e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c @@ -107,12 +107,17 @@ static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz) static void program_timing(struct timing_generator *tg, const struct dc_crtc_timing *timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios) { if (!use_vbios) program_pix_dur(tg, timing->pix_clk_100hz); - dce110_tg_program_timing(tg, timing, use_vbios); + dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios); } static void dce80_timing_generator_enable_advanced_request( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c index 2b2de1d913c9..9f2ffce10e12 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c @@ -27,6 +27,7 @@ #include "reg_helper.h" #include "core_types.h" +#include "dal_asic_id.h" #define TO_DCE_CLK_MGR(clocks)\ container_of(clocks, struct dce_clk_mgr, base) @@ -91,13 +92,18 @@ static int dcn1_determine_dppclk_threshold(struct clk_mgr *clk_mgr, struct dc_cl static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clocks *new_clocks) { + int i; struct dc *dc = clk_mgr->ctx->dc; int dispclk_to_dpp_threshold = dcn1_determine_dppclk_threshold(clk_mgr, new_clocks); bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; - int i; /* set disp clk to dpp clk threshold */ - dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold); + + if (clk_mgr->funcs->set_dispclk && clk_mgr->funcs->set_dprefclk) { + clk_mgr->funcs->set_dispclk(clk_mgr, dispclk_to_dpp_threshold); + clk_mgr->funcs->set_dprefclk(clk_mgr); + } else + dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold); /* update request dpp clk division option */ for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -113,8 +119,13 @@ static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clo } /* If target clk not same as dppclk threshold, set to target clock */ - if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) - dce112_set_clock(clk_mgr, new_clocks->dispclk_khz); + if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) { + if (clk_mgr->funcs->set_dispclk && clk_mgr->funcs->set_dprefclk) { + clk_mgr->funcs->set_dispclk(clk_mgr, new_clocks->dispclk_khz); + clk_mgr->funcs->set_dprefclk(clk_mgr); + } else + dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold); + } clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz; clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz; @@ -242,7 +253,62 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, } } } -static const struct clk_mgr_funcs dcn1_funcs = { + +#define VBIOSSMC_MSG_SetDispclkFreq 0x4 +#define VBIOSSMC_MSG_SetDprefclkFreq 0x5 + +int dcn10_set_dispclk(struct clk_mgr *clk_mgr_base, int requested_dispclk_khz) +{ + int actual_dispclk_set_khz = -1; + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr_base); + + /* First clear response register */ + //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_91, 0); + REG_WRITE(MP1_SMN_C2PMSG_91, 0); + + /* Set the parameter register for the SMU message, unit is Mhz */ + //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_83, requested_dispclk_khz / 1000); + REG_WRITE(MP1_SMN_C2PMSG_83, requested_dispclk_khz / 1000); + + /* Trigger the message transaction by writing the message ID */ + //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDispclkFreq); + REG_WRITE(MP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDispclkFreq); + + REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000); + + /* Actual dispclk set is returned in the parameter register */ + actual_dispclk_set_khz = REG_READ(MP1_SMN_C2PMSG_83) * 1000; + + return actual_dispclk_set_khz; + +} + +int dcn10_set_dprefclk(struct clk_mgr *clk_mgr_base) +{ + int actual_dprefclk_set_khz = -1; + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr_base); + + REG_WRITE(MP1_SMN_C2PMSG_91, 0); + + /* Set the parameter register for the SMU message */ + REG_WRITE(MP1_SMN_C2PMSG_83, clk_mgr_dce->dprefclk_khz / 1000); + + /* Trigger the message transaction by writing the message ID */ + REG_WRITE(MP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDprefclkFreq); + + /* Wait for SMU response */ + REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000); + + actual_dprefclk_set_khz = REG_READ(MP1_SMN_C2PMSG_83) * 1000; + + return actual_dprefclk_set_khz; +} + +int (*set_dispclk)(struct pp_smu *pp_smu, int dispclk); + +int (*set_dprefclk)(struct pp_smu *pp_smu); + +static struct clk_mgr_funcs dcn1_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = dcn1_update_clocks }; @@ -266,8 +332,8 @@ struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx) clk_mgr_dce->dprefclk_ss_percentage = 0; clk_mgr_dce->dprefclk_ss_divider = 1000; clk_mgr_dce->ss_on_dprefclk = false; - clk_mgr_dce->dprefclk_khz = 600000; + if (bp->integrated_info) clk_mgr_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq; if (clk_mgr_dce->dentist_vco_freq_khz == 0) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index 0db2a6e96fc0..bf978831bb0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -263,20 +263,15 @@ void hubbub1_wm_change_req_wa(struct hubbub *hubbub) DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, 1); } -void hubbub1_program_watermarks( +void hubbub1_program_urgent_watermarks( struct hubbub *hubbub, struct dcn_watermark_set *watermarks, unsigned int refclk_mhz, bool safe_to_lower) { struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); - /* - * Need to clamp to max of the register values (i.e. no wrap) - * for dcn1, all wm registers are 21-bit wide - */ uint32_t prog_wm_value; - /* Repeat for water mark set A, B, C and D. */ /* clock state A */ if (safe_to_lower || watermarks->a.urgent_ns > hubbub1->watermarks.a.urgent_ns) { @@ -291,60 +286,14 @@ void hubbub1_program_watermarks( watermarks->a.urgent_ns, prog_wm_value); } - if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A)) { - if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) { - hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->a.pte_meta_urgent_ns, prog_wm_value); - } - } - - if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A)) { - if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = - watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); - } - - if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns - > hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) { - hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns = - watermarks->a.cstate_pstate.cstate_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->a.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value); - } - } - - if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns - > hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) { - hubbub1->watermarks.a.cstate_pstate.pstate_change_ns = - watermarks->a.cstate_pstate.pstate_change_ns; - prog_wm_value = convert_and_clamp( - watermarks->a.cstate_pstate.pstate_change_ns, + if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) { + hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns, refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0, - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" - "HW register value = 0x%x\n\n", - watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->a.pte_meta_urgent_ns, prog_wm_value); } /* clock state B */ @@ -360,60 +309,14 @@ void hubbub1_program_watermarks( watermarks->b.urgent_ns, prog_wm_value); } - if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B)) { - if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) { - hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->b.pte_meta_urgent_ns, prog_wm_value); - } - } - - if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B)) { - if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = - watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); - } - - if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns - > hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) { - hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns = - watermarks->b.cstate_pstate.cstate_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->b.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value); - } - } - - if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns - > hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) { - hubbub1->watermarks.b.cstate_pstate.pstate_change_ns = - watermarks->b.cstate_pstate.pstate_change_ns; - prog_wm_value = convert_and_clamp( - watermarks->b.cstate_pstate.pstate_change_ns, + if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) { + hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns, refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0, - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" - "HW register value = 0x%x\n\n", - watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->b.pte_meta_urgent_ns, prog_wm_value); } /* clock state C */ @@ -429,60 +332,14 @@ void hubbub1_program_watermarks( watermarks->c.urgent_ns, prog_wm_value); } - if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C)) { - if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) { - hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->c.pte_meta_urgent_ns, prog_wm_value); - } - } - - if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C)) { - if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = - watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); - } - - if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns - > hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) { - hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns = - watermarks->c.cstate_pstate.cstate_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->c.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value); - } - } - - if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns - > hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) { - hubbub1->watermarks.c.cstate_pstate.pstate_change_ns = - watermarks->c.cstate_pstate.pstate_change_ns; - prog_wm_value = convert_and_clamp( - watermarks->c.cstate_pstate.pstate_change_ns, + if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) { + hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns, refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0, - DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n" - "HW register value = 0x%x\n\n", - watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->c.pte_meta_urgent_ns, prog_wm_value); } /* clock state D */ @@ -498,48 +355,199 @@ void hubbub1_program_watermarks( watermarks->d.urgent_ns, prog_wm_value); } - if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D)) { - if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) { - hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns; - prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns, - refclk_mhz, 0x1fffff); - REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->d.pte_meta_urgent_ns, prog_wm_value); - } + if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) { + hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns; + prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns, + refclk_mhz, 0x1fffff); + REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->d.pte_meta_urgent_ns, prog_wm_value); } +} - if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D)) { - if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns - > hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) { - hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = - watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0, - DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); - } +void hubbub1_program_stutter_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower) +{ + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + uint32_t prog_wm_value; - if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns - > hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) { - hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns = - watermarks->d.cstate_pstate.cstate_exit_ns; - prog_wm_value = convert_and_clamp( - watermarks->d.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); - REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0, - DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value); - DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n" - "HW register value = 0x%x\n", - watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value); - } + /* clock state A */ + if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns + > hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = + watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); + } + + if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns + > hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns = + watermarks->a.cstate_pstate.cstate_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->a.cstate_pstate.cstate_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value); + } + + /* clock state B */ + if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns + > hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = + watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); } + if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns + > hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns = + watermarks->b.cstate_pstate.cstate_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->b.cstate_pstate.cstate_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value); + } + + /* clock state C */ + if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns + > hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = + watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); + } + + if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns + > hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns = + watermarks->c.cstate_pstate.cstate_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->c.cstate_pstate.cstate_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value); + } + + /* clock state D */ + if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns + > hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) { + hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = + watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0, + DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value); + } + + if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns + > hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) { + hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns = + watermarks->d.cstate_pstate.cstate_exit_ns; + prog_wm_value = convert_and_clamp( + watermarks->d.cstate_pstate.cstate_exit_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0, + DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n" + "HW register value = 0x%x\n", + watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value); + } + +} + +void hubbub1_program_pstate_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower) +{ + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + uint32_t prog_wm_value; + + /* clock state A */ + if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns + > hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) { + hubbub1->watermarks.a.cstate_pstate.pstate_change_ns = + watermarks->a.cstate_pstate.pstate_change_ns; + prog_wm_value = convert_and_clamp( + watermarks->a.cstate_pstate.pstate_change_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0, + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value); + } + + /* clock state B */ + if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns + > hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) { + hubbub1->watermarks.b.cstate_pstate.pstate_change_ns = + watermarks->b.cstate_pstate.pstate_change_ns; + prog_wm_value = convert_and_clamp( + watermarks->b.cstate_pstate.pstate_change_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0, + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value); + } + + /* clock state C */ + if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns + > hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) { + hubbub1->watermarks.c.cstate_pstate.pstate_change_ns = + watermarks->c.cstate_pstate.pstate_change_ns; + prog_wm_value = convert_and_clamp( + watermarks->c.cstate_pstate.pstate_change_ns, + refclk_mhz, 0x1fffff); + REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0, + DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value); + DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n" + "HW register value = 0x%x\n\n", + watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value); + } + + /* clock state D */ if (safe_to_lower || watermarks->d.cstate_pstate.pstate_change_ns > hubbub1->watermarks.d.cstate_pstate.pstate_change_ns) { hubbub1->watermarks.d.cstate_pstate.pstate_change_ns = @@ -553,6 +561,22 @@ void hubbub1_program_watermarks( "HW register value = 0x%x\n\n", watermarks->d.cstate_pstate.pstate_change_ns, prog_wm_value); } +} + +void hubbub1_program_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower) +{ + struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub); + /* + * Need to clamp to max of the register values (i.e. no wrap) + * for dcn1, all wm registers are 21-bit wide + */ + hubbub1_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); + hubbub1_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); + hubbub1_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower); REG_UPDATE(DCHUBBUB_ARB_SAT_LEVEL, DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz); @@ -903,9 +927,7 @@ void hubbub1_construct(struct hubbub *hubbub, hubbub1->masks = hubbub_mask; hubbub1->debug_test_index_pstate = 0x7; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (ctx->dce_version == DCN_VERSION_1_01) hubbub1->debug_test_index_pstate = 0xB; -#endif } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index 85811b24a497..7c2559c9ae23 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -262,4 +262,20 @@ void hubbub1_construct(struct hubbub *hubbub, const struct dcn_hubbub_shift *hubbub_shift, const struct dcn_hubbub_mask *hubbub_mask); +void hubbub1_program_urgent_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower); +void hubbub1_program_stutter_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower); +void hubbub1_program_pstate_watermarks( + struct hubbub *hubbub, + struct dcn_watermark_set *watermarks, + unsigned int refclk_mhz, + bool safe_to_lower); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 33d311cea28c..66bb0e7db25c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -658,16 +658,15 @@ static enum dc_status dcn10_enable_stream_timing( BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; } - pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset; - pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start; - pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset; - pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width; - - pipe_ctx->stream_res.tg->dlg_otg_param.signal = pipe_ctx->stream->signal; pipe_ctx->stream_res.tg->funcs->program_timing( pipe_ctx->stream_res.tg, &stream->timing, + pipe_ctx->pipe_dlg_param.vready_offset, + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width, + pipe_ctx->stream->signal, true); #if 0 /* move to after enable_crtc */ @@ -1756,7 +1755,7 @@ static void dcn10_program_output_csc(struct dc *dc, bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) { - if (pipe_ctx->plane_state->visible) + if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible) return true; if (pipe_ctx->bottom_pipe && is_lower_pipe_tree_visible(pipe_ctx->bottom_pipe)) return true; @@ -1765,7 +1764,7 @@ bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx) { - if (pipe_ctx->plane_state->visible) + if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible) return true; if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe)) return true; @@ -1774,7 +1773,7 @@ bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx) bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx) { - if (pipe_ctx->plane_state->visible) + if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible) return true; if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe)) return true; @@ -1920,7 +1919,7 @@ static uint16_t fixed_point_to_int_frac( return result; } -void build_prescale_params(struct dc_bias_and_scale *bias_and_scale, +void dcn10_build_prescale_params(struct dc_bias_and_scale *bias_and_scale, const struct dc_plane_state *plane_state) { if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN @@ -1953,7 +1952,7 @@ static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state) plane_state->color_space); //set scale and bias registers - build_prescale_params(&bns_params, plane_state); + dcn10_build_prescale_params(&bns_params, plane_state); if (dpp->funcs->dpp_program_bias_and_scale) dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params); } @@ -2279,14 +2278,15 @@ static void program_all_pipe_in_tree( if (pipe_ctx->top_pipe == NULL) { bool blank = !is_pipe_tree_visible(pipe_ctx); - pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset; - pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start; - pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset; - pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width; - pipe_ctx->stream_res.tg->dlg_otg_param.signal = pipe_ctx->stream->signal; - pipe_ctx->stream_res.tg->funcs->program_global_sync( - pipe_ctx->stream_res.tg); + pipe_ctx->stream_res.tg, + pipe_ctx->pipe_dlg_param.vready_offset, + pipe_ctx->pipe_dlg_param.vstartup_start, + pipe_ctx->pipe_dlg_param.vupdate_offset, + pipe_ctx->pipe_dlg_param.vupdate_width); + + pipe_ctx->stream_res.tg->funcs->set_vtg_params( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); dc->hwss.blank_pixel_data(dc, pipe_ctx, blank); @@ -2644,9 +2644,6 @@ static void dcn10_wait_for_mpcc_disconnect( res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst); pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false; hubp->funcs->set_blank(hubp, true); - /*DC_LOG_ERROR(dc->ctx->logger, - "[debug_mpo: wait_for_mpcc finished waiting on mpcc %d]\n", - i);*/ } } @@ -2790,7 +2787,6 @@ static void apply_front_porch_workaround( int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx) { - struct timing_generator *optc = pipe_ctx->stream_res.tg; const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing; struct dc_crtc_timing patched_crtc_timing; int vesa_sync_start; @@ -2813,7 +2809,7 @@ int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx) * interlace_factor; vertical_line_start = asic_blank_end - - optc->dlg_otg_param.vstartup_start + 1; + pipe_ctx->pipe_dlg_param.vstartup_start + 1; return vertical_line_start; } @@ -2961,6 +2957,18 @@ static void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx, } } +static void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size) +{ + if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + pipe_ctx->stream_res.stream_enc->funcs->send_immediate_sdp_message( + pipe_ctx->stream_res.stream_enc, + custom_sdp_message, + sdp_message_size); + } +} + static const struct hw_sequencer_funcs dcn10_funcs = { .program_gamut_remap = program_gamut_remap, .init_hw = dcn10_init_hw, @@ -2980,6 +2988,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .enable_timing_synchronization = dcn10_enable_timing_synchronization, .enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset, .update_info_frame = dce110_update_info_frame, + .send_immediate_sdp_message = dcn10_send_immediate_sdp_message, .enable_stream = dce110_enable_stream, .disable_stream = dce110_disable_stream, .unblank_stream = dcn10_unblank_stream, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index 4b3b27a5d23b..ef94d6b15843 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -83,6 +83,8 @@ struct pipe_ctx *find_top_pipe_for_stream( int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx); +void dcn10_build_prescale_params(struct dc_bias_and_scale *bias_and_scale, + const struct dc_plane_state *plane_state); void lock_all_pipes(struct dc *dc, struct dc_state *context, bool lock); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index 0126a44ba012..e25ae43f8d32 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -726,6 +726,8 @@ void dcn10_link_encoder_construct( enc10->base.features.flags.bits.IS_HBR3_CAPABLE = bp_cap_info.DP_HBR3_EN; enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; + enc10->base.features.flags.bits.DP_IS_USB_C = + bp_cap_info.DP_IS_USB_C; } else { DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", __func__, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 0345d51e9d6f..533b0f3cf6c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -46,9 +46,7 @@ * This is a workaround for a bug that has existed since R5xx and has not been * fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive. */ -static void optc1_apply_front_porch_workaround( - struct timing_generator *optc, - struct dc_crtc_timing *timing) +static void apply_front_porch_workaround(struct dc_crtc_timing *timing) { if (timing->flags.INTERLACE == 1) { if (timing->v_front_porch < 2) @@ -60,24 +58,33 @@ static void optc1_apply_front_porch_workaround( } void optc1_program_global_sync( - struct timing_generator *optc) + struct timing_generator *optc, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - if (optc->dlg_otg_param.vstartup_start == 0) { + optc1->vready_offset = vready_offset; + optc1->vstartup_start = vstartup_start; + optc1->vupdate_offset = vupdate_offset; + optc1->vupdate_width = vupdate_width; + + if (optc1->vstartup_start == 0) { BREAK_TO_DEBUGGER(); return; } REG_SET(OTG_VSTARTUP_PARAM, 0, - VSTARTUP_START, optc->dlg_otg_param.vstartup_start); + VSTARTUP_START, optc1->vstartup_start); REG_SET_2(OTG_VUPDATE_PARAM, 0, - VUPDATE_OFFSET, optc->dlg_otg_param.vupdate_offset, - VUPDATE_WIDTH, optc->dlg_otg_param.vupdate_width); + VUPDATE_OFFSET, optc1->vupdate_offset, + VUPDATE_WIDTH, optc1->vupdate_width); REG_SET(OTG_VREADY_PARAM, 0, - VREADY_OFFSET, optc->dlg_otg_param.vready_offset); + VREADY_OFFSET, optc1->vready_offset); } static void optc1_disable_stereo(struct timing_generator *optc) @@ -132,25 +139,32 @@ void optc1_setup_vertical_interrupt2( void optc1_program_timing( struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios) { struct dc_crtc_timing patched_crtc_timing; - uint32_t vesa_sync_start; uint32_t asic_blank_end; uint32_t asic_blank_start; uint32_t v_total; uint32_t v_sync_end; - uint32_t v_init, v_fp2; uint32_t h_sync_polarity, v_sync_polarity; uint32_t start_point = 0; uint32_t field_num = 0; uint32_t h_div_2; - int32_t vertical_line_start; struct optc *optc1 = DCN10TG_FROM_TG(optc); + optc1->signal = signal; + optc1->vready_offset = vready_offset; + optc1->vstartup_start = vstartup_start; + optc1->vupdate_offset = vupdate_offset; + optc1->vupdate_width = vupdate_width; patched_crtc_timing = *dc_crtc_timing; - optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); + apply_front_porch_workaround(&patched_crtc_timing); /* Load horizontal timing */ @@ -163,24 +177,16 @@ void optc1_program_timing( OTG_H_SYNC_A_START, 0, OTG_H_SYNC_A_END, patched_crtc_timing.h_sync_width); - /* asic_h_blank_end = HsyncWidth + HbackPorch = - * vesa. usHorizontalTotal - vesa. usHorizontalSyncStart - - * vesa.h_left_border - */ - vesa_sync_start = patched_crtc_timing.h_addressable + - patched_crtc_timing.h_border_right + + /* blank_start = line end - front porch */ + asic_blank_start = patched_crtc_timing.h_total - patched_crtc_timing.h_front_porch; - asic_blank_end = patched_crtc_timing.h_total - - vesa_sync_start - + /* blank_end = blank_start - active */ + asic_blank_end = asic_blank_start - + patched_crtc_timing.h_border_right - + patched_crtc_timing.h_addressable - patched_crtc_timing.h_border_left; - /* h_blank_start = v_blank_end + v_active */ - asic_blank_start = asic_blank_end + - patched_crtc_timing.h_border_left + - patched_crtc_timing.h_addressable + - patched_crtc_timing.h_border_right; - REG_UPDATE_2(OTG_H_BLANK_START_END, OTG_H_BLANK_START, asic_blank_start, OTG_H_BLANK_END, asic_blank_end); @@ -212,24 +218,15 @@ void optc1_program_timing( OTG_V_SYNC_A_START, 0, OTG_V_SYNC_A_END, v_sync_end); - vesa_sync_start = patched_crtc_timing.v_addressable + - patched_crtc_timing.v_border_bottom + + /* blank_start = frame end - front porch */ + asic_blank_start = patched_crtc_timing.v_total - patched_crtc_timing.v_front_porch; - asic_blank_end = (patched_crtc_timing.v_total - - vesa_sync_start - - patched_crtc_timing.v_border_top); - - /* v_blank_start = v_blank_end + v_active */ - asic_blank_start = asic_blank_end + - (patched_crtc_timing.v_border_top + - patched_crtc_timing.v_addressable + - patched_crtc_timing.v_border_bottom); - - vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; - v_fp2 = 0; - if (vertical_line_start < 0) - v_fp2 = -vertical_line_start; + /* blank_end = blank_start - active */ + asic_blank_end = asic_blank_start - + patched_crtc_timing.v_border_bottom - + patched_crtc_timing.v_addressable - + patched_crtc_timing.v_border_top; REG_UPDATE_2(OTG_V_BLANK_START_END, OTG_V_BLANK_START, asic_blank_start, @@ -242,10 +239,9 @@ void optc1_program_timing( REG_UPDATE(OTG_V_SYNC_A_CNTL, OTG_V_SYNC_A_POL, v_sync_polarity); - v_init = asic_blank_start; - if (optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT || - optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT_MST || - optc->dlg_otg_param.signal == SIGNAL_TYPE_EDP) { + if (optc1->signal == SIGNAL_TYPE_DISPLAY_PORT || + optc1->signal == SIGNAL_TYPE_DISPLAY_PORT_MST || + optc1->signal == SIGNAL_TYPE_EDP) { start_point = 1; if (patched_crtc_timing.flags.INTERLACE == 1) field_num = 1; @@ -253,13 +249,10 @@ void optc1_program_timing( /* Interlace */ if (REG(OTG_INTERLACE_CONTROL)) { - if (patched_crtc_timing.flags.INTERLACE == 1) { + if (patched_crtc_timing.flags.INTERLACE == 1) REG_UPDATE(OTG_INTERLACE_CONTROL, OTG_INTERLACE_ENABLE, 1); - v_init = v_init / 2; - if ((optc->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end) - v_fp2 = v_fp2 / 2; - } else + else REG_UPDATE(OTG_INTERLACE_CONTROL, OTG_INTERLACE_ENABLE, 0); } @@ -268,16 +261,18 @@ void optc1_program_timing( REG_UPDATE(CONTROL, VTG0_ENABLE, 0); - REG_UPDATE_2(CONTROL, - VTG0_FP2, v_fp2, - VTG0_VCOUNT_INIT, v_init); - /* original code is using VTG offset to address OTG reg, seems wrong */ REG_UPDATE_2(OTG_CONTROL, OTG_START_POINT_CNTL, start_point, OTG_FIELD_NUMBER_CNTL, field_num); - optc1_program_global_sync(optc); + optc->funcs->program_global_sync(optc, + vready_offset, + vstartup_start, + vupdate_offset, + vupdate_width); + + optc->funcs->set_vtg_params(optc, dc_crtc_timing); /* TODO * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1 @@ -296,6 +291,48 @@ void optc1_program_timing( } +void optc1_set_vtg_params(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing) +{ + struct dc_crtc_timing patched_crtc_timing; + uint32_t asic_blank_end; + uint32_t v_init; + uint32_t v_fp2 = 0; + int32_t vertical_line_start; + + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + patched_crtc_timing = *dc_crtc_timing; + apply_front_porch_workaround(&patched_crtc_timing); + + /* VCOUNT_INIT is the start of blank */ + v_init = patched_crtc_timing.v_total - patched_crtc_timing.v_front_porch; + + /* end of blank = v_init - active */ + asic_blank_end = v_init - + patched_crtc_timing.v_border_bottom - + patched_crtc_timing.v_addressable - + patched_crtc_timing.v_border_top; + + /* if VSTARTUP is before VSYNC, FP2 is the offset, otherwise 0 */ + vertical_line_start = asic_blank_end - optc1->vstartup_start + 1; + if (vertical_line_start < 0) + v_fp2 = -vertical_line_start; + + /* Interlace */ + if (REG(OTG_INTERLACE_CONTROL)) { + if (patched_crtc_timing.flags.INTERLACE == 1) { + v_init = v_init / 2; + if ((optc1->vstartup_start/2)*2 > asic_blank_end) + v_fp2 = v_fp2 / 2; + } + } + + REG_UPDATE_2(CONTROL, + VTG0_FP2, v_fp2, + VTG0_VCOUNT_INIT, v_init); +} + void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -1420,6 +1457,7 @@ static const struct timing_generator_funcs dcn10_tg_funcs = { .clear_optc_underflow = optc1_clear_optc_underflow, .get_crc = optc1_get_crc, .configure_crc = optc1_configure_crc, + .set_vtg_params = optc1_set_vtg_params, }; void dcn10_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 4eb9a898c237..651b8caa4b9f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -446,6 +446,12 @@ struct optc { uint32_t min_v_sync_width; uint32_t min_v_blank; uint32_t min_v_blank_interlace; + + int vstartup_start; + int vupdate_offset; + int vupdate_width; + int vready_offset; + enum signal_type signal; }; void dcn10_timing_generator_init(struct optc *optc); @@ -481,6 +487,11 @@ bool optc1_validate_timing( void optc1_program_timing( struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, bool use_vbios); void optc1_setup_vertical_interrupt0( @@ -495,7 +506,11 @@ void optc1_setup_vertical_interrupt2( uint32_t start_line); void optc1_program_global_sync( - struct timing_generator *optc); + struct timing_generator *optc, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width); bool optc1_disable_crtc(struct timing_generator *optc); @@ -582,4 +597,7 @@ bool optc1_get_crc(struct timing_generator *optc, bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); +void optc1_set_vtg_params(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing); + #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 7eccb54c421d..bfddd51294a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -29,7 +29,6 @@ #include "resource.h" #include "include/irq_service_interface.h" #include "dcn10_resource.h" - #include "dcn10_ipp.h" #include "dcn10_mpc.h" #include "irq/dcn10/irq_service_dcn10.h" @@ -153,9 +152,7 @@ enum dcn10_clk_src_array_id { DCN10_CLK_SRC_PLL2, DCN10_CLK_SRC_PLL3, DCN10_CLK_SRC_TOTAL, -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) DCN101_CLK_SRC_TOTAL = DCN10_CLK_SRC_PLL3 -#endif }; /* begin ********************* @@ -445,7 +442,6 @@ static const struct bios_registers bios_regs = { HUBP_REG_LIST_DCN10(id)\ } - static const struct dcn_mi_registers hubp_regs[] = { hubp_regs(0), hubp_regs(1), @@ -461,7 +457,6 @@ static const struct dcn_mi_mask hubp_mask = { HUBP_MASK_SH_LIST_DCN10(_MASK) }; - static const struct dcn_hubbub_registers hubbub_reg = { HUBBUB_REG_LIST_DCN10(0) }; @@ -494,6 +489,27 @@ static const struct dce110_clk_src_mask cs_mask = { CS_COMMON_MASK_SH_LIST_DCN1_0(_MASK) }; + +#define mmMP1_SMN_C2PMSG_91 0x1629B +#define mmMP1_SMN_C2PMSG_83 0x16293 +#define mmMP1_SMN_C2PMSG_67 0x16283 + +#define MP1_SMN_C2PMSG_91__CONTENT_MASK 0xffffffffL +#define MP1_SMN_C2PMSG_83__CONTENT_MASK 0xffffffffL +#define MP1_SMN_C2PMSG_67__CONTENT_MASK 0xffffffffL +#define MP1_SMN_C2PMSG_91__CONTENT__SHIFT 0x00000000 +#define MP1_SMN_C2PMSG_83__CONTENT__SHIFT 0x00000000 +#define MP1_SMN_C2PMSG_67__CONTENT__SHIFT 0x00000000 + + +static const struct clk_mgr_shift clk_mgr_shift = { + CLK_MASK_SH_LIST_RV1(__SHIFT) +}; + +static const struct clk_mgr_mask clk_mgr_mask = { + CLK_MASK_SH_LIST_RV1(_MASK) +}; + static const struct resource_caps res_cap = { .num_timing_generator = 4, .num_opp = 4, @@ -504,7 +520,6 @@ static const struct resource_caps res_cap = { .num_ddc = 4, }; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) static const struct resource_caps rv2_res_cap = { .num_timing_generator = 3, .num_opp = 3, @@ -514,7 +529,6 @@ static const struct resource_caps rv2_res_cap = { .num_pll = 3, .num_ddc = 3, }; -#endif static const struct dc_plane_cap plane_cap = { .type = DC_PLANE_TYPE_DCN_UNIVERSAL, @@ -1217,6 +1231,38 @@ static enum dc_status dcn10_get_default_swizzle_mode(struct dc_plane_state *plan return result; } +struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream) +{ + int i; + int j = -1; + struct dc_link *link = stream->link; + + for (i = 0; i < pool->stream_enc_count; i++) { + if (!res_ctx->is_stream_enc_acquired[i] && + pool->stream_enc[i]) { + /* Store first available for MST second display + * in daisy chain use case + */ + j = i; + if (pool->stream_enc[i]->id == + link->link_enc->preferred_engine) + return pool->stream_enc[i]; + } + } + + /* + * For CZ and later, we can allow DIG FE and BE to differ for all display types + */ + + if (j >= 0) + return pool->stream_enc[j]; + + return NULL; +} + static const struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn10_get_dcc_compression_cap }; @@ -1229,7 +1275,8 @@ static const struct resource_funcs dcn10_res_pool_funcs = { .validate_plane = dcn10_validate_plane, .validate_global = dcn10_validate_global, .add_stream_to_ctx = dcn10_add_stream_to_ctx, - .get_default_swizzle_mode = dcn10_get_default_swizzle_mode + .get_default_swizzle_mode = dcn10_get_default_swizzle_mode, + .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link }; static uint32_t read_pipe_fuses(struct dc_context *ctx) @@ -1252,11 +1299,9 @@ static bool construct( ctx->dc_bios->regs = &bios_regs; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (ctx->dce_version == DCN_VERSION_1_01) pool->base.res_cap = &rv2_res_cap; else -#endif pool->base.res_cap = &res_cap; pool->base.funcs = &dcn10_res_pool_funcs; @@ -1273,10 +1318,8 @@ static bool construct( /* max pipe num for ASIC before check pipe fuses */ pool->base.pipe_count = pool->base.res_cap->num_timing_generator; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (dc->ctx->dce_version == DCN_VERSION_1_01) pool->base.pipe_count = 3; -#endif dc->caps.max_video_width = 3840; dc->caps.max_downscale_ratio = 200; dc->caps.i2c_speed_in_khz = 100; @@ -1309,26 +1352,17 @@ static bool construct( CLOCK_SOURCE_COMBO_PHY_PLL2, &clk_src_regs[2], false); -#ifdef CONFIG_DRM_AMD_DC_DCN1_01 if (dc->ctx->dce_version == DCN_VERSION_1_0) { pool->base.clock_sources[DCN10_CLK_SRC_PLL3] = dcn10_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL3, &clk_src_regs[3], false); } -#else - pool->base.clock_sources[DCN10_CLK_SRC_PLL3] = - dcn10_clock_source_create(ctx, ctx->dc_bios, - CLOCK_SOURCE_COMBO_PHY_PLL3, - &clk_src_regs[3], false); -#endif pool->base.clk_src_count = DCN10_CLK_SRC_TOTAL; -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (dc->ctx->dce_version == DCN_VERSION_1_01) pool->base.clk_src_count = DCN101_CLK_SRC_TOTAL; -#endif pool->base.dp_clock_source = dcn10_clock_source_create(ctx, ctx->dc_bios, @@ -1343,12 +1377,6 @@ static bool construct( goto fail; } } - pool->base.clk_mgr = dcn1_clk_mgr_create(ctx); - if (pool->base.clk_mgr == NULL) { - dm_error("DC: failed to create display clock!\n"); - BREAK_TO_DEBUGGER(); - goto fail; - } pool->base.dmcu = dcn10_dmcu_create(ctx, &dmcu_regs, @@ -1374,7 +1402,6 @@ static bool construct( memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) if (dc->ctx->dce_version == DCN_VERSION_1_01) { struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc; struct dcn_ip_params *dcn_ip = dc->dcn_ip; @@ -1385,7 +1412,6 @@ static bool construct( dcn_soc->dram_clock_change_latency = 23; dcn_ip->max_num_dpp = 3; } -#endif if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) { dc->dcn_soc->urgent_latency = 3; dc->debug.disable_dmcu = true; @@ -1410,6 +1436,13 @@ static bool construct( pool->base.pp_smu = dcn10_pp_smu_create(ctx); + pool->base.clk_mgr = dcn1_clk_mgr_create(ctx); + if (pool->base.clk_mgr == NULL) { + dm_error("DC: failed to create display clock!\n"); + BREAK_TO_DEBUGGER(); + goto fail; + } + if (!dc->debug.disable_pplib_clock_request) dcn_bw_update_from_pplib(dc); dcn_bw_sync_calcs_and_dml(dc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h index 999c684a0b36..633025ccb870 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h @@ -42,6 +42,11 @@ struct resource_pool *dcn10_create_resource_pool( const struct dc_init_data *init_data, struct dc *dc); +struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream); + #endif /* __DC_RESOURCE_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index 8ee9f6dc1d62..ba71b5224e7f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -415,6 +415,7 @@ void enc1_stream_encoder_dp_set_stream_attribute( case COLOR_SPACE_APPCTRL: case COLOR_SPACE_CUSTOMPOINTS: case COLOR_SPACE_UNKNOWN: + case COLOR_SPACE_YCBCR709_BLACK: /* do nothing */ break; } @@ -726,11 +727,9 @@ void enc1_stream_encoder_update_dp_info_packets( 3, /* packetIndex */ &info_frame->hdrsmd); - if (info_frame->dpsdp.valid) - enc1_update_generic_info_packet( - enc1, - 4,/* packetIndex */ - &info_frame->dpsdp); + /* packetIndex 4 is used for send immediate sdp message, and please + * use other packetIndex (such as 5,6) for other info packet + */ /* enable/disable transmission of packet(s). * If enabled, packet transmission begins on the next frame @@ -738,7 +737,101 @@ void enc1_stream_encoder_update_dp_info_packets( REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid); REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid); REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid); - REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, info_frame->dpsdp.valid); + + + /* This bit is the master enable bit. + * When enabling secondary stream engine, + * this master bit must also be set. + * This register shared with audio info frame. + * Therefore we need to enable master bit + * if at least on of the fields is not 0 + */ + value = REG_READ(DP_SEC_CNTL); + if (value) + REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); +} + +void enc1_stream_encoder_send_immediate_sdp_message( + struct stream_encoder *enc, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value = 0; + + /* TODOFPGA Figure out a proper number for max_retries polling for lock + * use 50 for now. + */ + uint32_t max_retries = 50; + + /* check if GSP4 is transmitted */ + REG_WAIT(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, + 0, 10, max_retries); + + /* disable GSP4 transmitting */ + REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 0); + + /* transmit GSP4 at the earliest time in a frame */ + REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, 1); + + /*we need turn on clock before programming AFMT block*/ + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + + /* check if HW reading GSP memory */ + REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT, + 0, 10, max_retries); + + /* HW does is not reading GSP memory not reading too long -> + * something wrong. clear GPS memory access and notify? + * hw SW is writing to GSP memory + */ + REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1); + + /* use generic packet 4 for immediate sdp message */ + REG_UPDATE(AFMT_VBI_PACKET_CONTROL, + AFMT_GENERIC_INDEX, 4); + + /* write generic packet header + * (4th byte is for GENERIC0 only) + */ + REG_SET_4(AFMT_GENERIC_HDR, 0, + AFMT_GENERIC_HB0, custom_sdp_message[0], + AFMT_GENERIC_HB1, custom_sdp_message[1], + AFMT_GENERIC_HB2, custom_sdp_message[2], + AFMT_GENERIC_HB3, custom_sdp_message[3]); + + /* write generic packet contents + * (we never use last 4 bytes) + * there are 8 (0-7) mmDIG0_AFMT_GENERIC0_x registers + */ + { + const uint32_t *content = + (const uint32_t *) &custom_sdp_message[4]; + + REG_WRITE(AFMT_GENERIC_0, *content++); + REG_WRITE(AFMT_GENERIC_1, *content++); + REG_WRITE(AFMT_GENERIC_2, *content++); + REG_WRITE(AFMT_GENERIC_3, *content++); + REG_WRITE(AFMT_GENERIC_4, *content++); + REG_WRITE(AFMT_GENERIC_5, *content++); + REG_WRITE(AFMT_GENERIC_6, *content++); + REG_WRITE(AFMT_GENERIC_7, *content); + } + + /* check whether GENERIC4 registers double buffer update in immediate mode + * is pending + */ + REG_WAIT(AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING, + 0, 10, max_retries); + + /* atomically update double-buffered GENERIC4 registers in immediate mode + * (update immediately) + */ + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC4_IMMEDIATE_UPDATE, 1); + + /* enable GSP4 transmitting */ + REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 1); /* This bit is the master enable bit. * When enabling secondary stream engine, @@ -1462,6 +1555,8 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = { enc1_stream_encoder_stop_hdmi_info_packets, .update_dp_info_packets = enc1_stream_encoder_update_dp_info_packets, + .send_immediate_sdp_message = + enc1_stream_encoder_send_immediate_sdp_message, .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, .dp_blank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h index e654c2f55971..a292b106a8b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -81,6 +81,7 @@ SRI(DP_MSE_RATE_UPDATE, DP, id), \ SRI(DP_PIXEL_FORMAT, DP, id), \ SRI(DP_SEC_CNTL, DP, id), \ + SRI(DP_SEC_CNTL2, DP, id), \ SRI(DP_STEER_FIFO, DP, id), \ SRI(DP_VID_M, DP, id), \ SRI(DP_VID_N, DP, id), \ @@ -118,10 +119,12 @@ struct dcn10_stream_enc_registers { uint32_t AFMT_60958_1; uint32_t AFMT_60958_2; uint32_t DIG_FE_CNTL; + uint32_t DIG_FE_CNTL2; uint32_t DP_MSE_RATE_CNTL; uint32_t DP_MSE_RATE_UPDATE; uint32_t DP_PIXEL_FORMAT; uint32_t DP_SEC_CNTL; + uint32_t DP_SEC_CNTL2; uint32_t DP_STEER_FIFO; uint32_t DP_VID_M; uint32_t DP_VID_N; @@ -191,6 +194,10 @@ struct dcn10_stream_enc_registers { SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\ SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\ @@ -245,6 +252,7 @@ struct dcn10_stream_enc_registers { SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE_PENDING, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE_PENDING, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE_PENDING, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE_PENDING, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE_PENDING, mask_sh),\ @@ -253,6 +261,7 @@ struct dcn10_stream_enc_registers { SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\ @@ -260,6 +269,7 @@ struct dcn10_stream_enc_registers { SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_PPS, mask_sh),\ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\ SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\ SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\ @@ -304,6 +314,7 @@ struct dcn10_stream_enc_registers { type AFMT_GENERIC2_FRAME_UPDATE_PENDING;\ type AFMT_GENERIC3_FRAME_UPDATE_PENDING;\ type AFMT_GENERIC4_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING;\ type AFMT_GENERIC5_FRAME_UPDATE_PENDING;\ type AFMT_GENERIC6_FRAME_UPDATE_PENDING;\ type AFMT_GENERIC7_FRAME_UPDATE_PENDING;\ @@ -312,6 +323,7 @@ struct dcn10_stream_enc_registers { type AFMT_GENERIC2_FRAME_UPDATE;\ type AFMT_GENERIC3_FRAME_UPDATE;\ type AFMT_GENERIC4_FRAME_UPDATE;\ + type AFMT_GENERIC4_IMMEDIATE_UPDATE;\ type AFMT_GENERIC5_FRAME_UPDATE;\ type AFMT_GENERIC6_FRAME_UPDATE;\ type AFMT_GENERIC7_FRAME_UPDATE;\ @@ -366,7 +378,12 @@ struct dcn10_stream_enc_registers { type DP_SEC_GSP5_ENABLE;\ type DP_SEC_GSP6_ENABLE;\ type DP_SEC_GSP7_ENABLE;\ + type DP_SEC_GSP7_PPS;\ type DP_SEC_GSP7_SEND;\ + type DP_SEC_GSP4_SEND;\ + type DP_SEC_GSP4_SEND_PENDING;\ + type DP_SEC_GSP4_LINE_NUM;\ + type DP_SEC_GSP4_SEND_ANY_LINE;\ type DP_SEC_MPG_ENABLE;\ type DP_VID_STREAM_DIS_DEFER;\ type DP_VID_STREAM_ENABLE;\ @@ -484,6 +501,11 @@ void enc1_stream_encoder_update_dp_info_packets( struct stream_encoder *enc, const struct encoder_info_frame *info_frame); +void enc1_stream_encoder_send_immediate_sdp_message( + struct stream_encoder *enc, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size); + void enc1_stream_encoder_stop_dp_info_packets( struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h index 4fc4208d1472..9f7ebf6a4e40 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h @@ -80,6 +80,7 @@ struct pp_smu_funcs_rv { /* PPSMC_MSG_SetDisplayCount * 0 triggers S0i2 optimization */ + void (*set_display_count)(struct pp_smu *pp, int count); /* reader and writer WM's are sent together as part of one table*/ @@ -115,7 +116,6 @@ struct pp_smu_funcs_rv { /* PME w/a */ void (*set_pme_wa_enable)(struct pp_smu *pp); - }; struct pp_smu_funcs { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index c5b791d158a7..6cc59f138095 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -219,6 +219,9 @@ struct _vcs_dpi_display_pipe_source_params_st { unsigned char xfc_enable; unsigned char xfc_slave; struct _vcs_dpi_display_xfc_params_st xfc_params; + //for vstartuplines calculation freesync + unsigned char v_total_min; + unsigned char v_total_max; }; struct writeback_st { int wb_src_height; @@ -289,6 +292,8 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned char otg_inst; unsigned char odm_combine; unsigned char use_maximum_vstartup; + unsigned int vtotal_max; + unsigned int vtotal_min; }; struct _vcs_dpi_display_pipe_params_st { diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c index c2028c4744a6..a610fae16280 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c @@ -84,10 +84,6 @@ bool dal_hw_factory_init( return true; #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case DCN_VERSION_1_0: - dal_hw_factory_dcn10_init(factory); - return true; -#endif -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) case DCN_VERSION_1_01: dal_hw_factory_dcn10_init(factory); return true; diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c index 236ca28784a9..77615146b96e 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c @@ -84,11 +84,6 @@ bool dal_hw_translate_init( dal_hw_translate_dcn10_init(translate); return true; #endif -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) - case DCN_VERSION_1_01: - dal_hw_translate_dcn10_init(translate); - return true; -#endif default: BREAK_TO_DEBUGGER(); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 6f5ab05d6467..539d34d3439c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -123,6 +123,11 @@ struct resource_funcs { enum dc_status (*get_default_swizzle_mode)( struct dc_plane_state *plane_state); + struct stream_encoder *(*find_first_free_match_stream_enc_for_link)( + struct resource_context *res_ctx, + const struct resource_pool *pool, + struct dc_stream_state *stream); + }; struct audio_support{ @@ -212,6 +217,25 @@ struct plane_resource { struct dcn_fe_bandwidth bw; }; +union pipe_update_flags { + struct { + uint32_t enable : 1; + uint32_t disable : 1; + uint32_t odm : 1; + uint32_t global_sync : 1; + uint32_t opp_changed : 1; + uint32_t tg_changed : 1; + uint32_t mpcc : 1; + uint32_t dppclk : 1; + uint32_t hubp_interdependent : 1; + uint32_t hubp_rq_dlg_ttu : 1; + uint32_t gamut_remap : 1; + uint32_t scaler : 1; + uint32_t viewport : 1; + } bits; + uint32_t raw; +}; + struct pipe_ctx { struct dc_plane_state *plane_state; struct dc_stream_state *stream; @@ -234,6 +258,7 @@ struct pipe_ctx { struct _vcs_dpi_display_rq_regs_st rq_regs; struct _vcs_dpi_display_pipe_dest_params_st pipe_dlg_param; #endif + union pipe_update_flags update_flags; }; struct resource_context { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 31bd6d5183ab..f3fd3f8cac26 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -31,7 +31,7 @@ struct clk_mgr { struct dc_context *ctx; - const struct clk_mgr_funcs *funcs; + struct clk_mgr_funcs *funcs; struct dc_clocks clks; }; @@ -44,6 +44,12 @@ struct clk_mgr_funcs { int (*get_dp_ref_clk_frequency)(struct clk_mgr *clk_mgr); void (*init_clocks)(struct clk_mgr *clk_mgr); + + /* Returns actual clk that's set */ + int (*set_dispclk)(struct clk_mgr *clk_mgr, int requested_dispclk_khz); + int (*set_dprefclk)(struct clk_mgr *clk_mgr); }; + + #endif /* __DAL_CLK_MGR_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index c9d3e37e9531..ca162079a41b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -59,6 +59,7 @@ struct encoder_feature_support { uint32_t IS_TPS3_CAPABLE:1; uint32_t IS_TPS4_CAPABLE:1; uint32_t HDMI_6GB_EN:1; + uint32_t DP_IS_USB_C:1; } bits; uint32_t raw; } flags; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 49854eb73d1d..537563888f87 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -63,8 +63,6 @@ struct encoder_info_frame { struct dc_info_packet vsc; /* HDR Static MetaData */ struct dc_info_packet hdrsmd; - /* custom sdp message */ - struct dc_info_packet dpsdp; }; struct encoder_unblank_param { @@ -123,6 +121,11 @@ struct stream_encoder_funcs { struct stream_encoder *enc, const struct encoder_info_frame *info_frame); + void (*send_immediate_sdp_message)( + struct stream_encoder *enc, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size); + void (*stop_dp_info_packets)( struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 067d53caf28a..0b8c6896581f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -70,14 +70,6 @@ enum crtc_state { CRTC_STATE_VACTIVE }; -struct _dlg_otg_param { - int vstartup_start; - int vupdate_offset; - int vupdate_width; - int vready_offset; - enum signal_type signal; -}; - struct vupdate_keepout_params { int start_offset; int end_offset; @@ -126,7 +118,6 @@ struct timing_generator { const struct timing_generator_funcs *funcs; struct dc_bios *bp; struct dc_context *ctx; - struct _dlg_otg_param dlg_otg_param; int inst; }; @@ -140,7 +131,13 @@ struct timing_generator_funcs { const struct dc_crtc_timing *timing); void (*program_timing)(struct timing_generator *tg, const struct dc_crtc_timing *timing, - bool use_vbios); + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width, + const enum signal_type signal, + bool use_vbios + ); void (*setup_vertical_interrupt0)( struct timing_generator *optc, uint32_t start_line, @@ -210,7 +207,11 @@ struct timing_generator_funcs { bool (*arm_vert_intr)(struct timing_generator *tg, uint8_t width); - void (*program_global_sync)(struct timing_generator *tg); + void (*program_global_sync)(struct timing_generator *tg, + int vready_offset, + int vstartup_start, + int vupdate_offset, + int vupdate_width); void (*enable_optc_clock)(struct timing_generator *tg, bool enable); void (*program_stereo)(struct timing_generator *tg, const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); @@ -237,6 +238,8 @@ struct timing_generator_funcs { bool (*get_crc)(struct timing_generator *tg, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); + void (*set_vtg_params)(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 33905468e2b9..eb1c12ed026a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -158,6 +158,11 @@ struct hw_sequencer_funcs { void (*update_info_frame)(struct pipe_ctx *pipe_ctx); + void (*send_immediate_sdp_message)( + struct pipe_ctx *pipe_ctx, + const uint8_t *custom_sdp_message, + unsigned int sdp_message_size); + void (*enable_stream)(struct pipe_ctx *pipe_ctx); void (*disable_stream)(struct pipe_ctx *pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h index 01bf01a34a08..c30437ae8395 100644 --- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h +++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h @@ -307,7 +307,8 @@ struct bp_encoder_cap_info { uint32_t DP_HBR2_EN:1; uint32_t DP_HBR3_EN:1; uint32_t HDMI_6GB_EN:1; - uint32_t RESERVED:30; + uint32_t DP_IS_USB_C:1; + uint32_t RESERVED:27; }; #endif /*__DAL_BIOS_PARSER_TYPES_H__ */ diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 4c8ce7938f01..63c3e77159d9 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -131,21 +131,18 @@ #define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */ #define RAVEN_A0 0x01 #define RAVEN_B0 0x21 -#define PICASSO_A0 0x41 -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) /* DCN1_01 */ +#define PICASSO_A0 0x41 #define RAVEN2_A0 0x81 -#endif +#define RAVEN1_F0 0xF0 #define RAVEN_UNKNOWN 0xFF #define ASIC_REV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN) #define RAVEN1_F0 0xF0 #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) #define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0)) #define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) -#endif /* DCN1_01 */ #define FAMILY_RV 142 /* DCN 1*/ diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h index f5bd869d4320..dabdbc0999d4 100644 --- a/drivers/gpu/drm/amd/display/include/dal_types.h +++ b/drivers/gpu/drm/amd/display/include/dal_types.h @@ -45,9 +45,7 @@ enum dce_version { DCE_VERSION_12_1, DCE_VERSION_MAX, DCN_VERSION_1_0, -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) DCN_VERSION_1_01, -#endif /* DCN1_01 */ DCN_VERSION_MAX }; diff --git a/drivers/gpu/drm/amd/display/include/set_mode_types.h b/drivers/gpu/drm/amd/display/include/set_mode_types.h index 2b836e582c08..845fea8a387f 100644 --- a/drivers/gpu/drm/amd/display/include/set_mode_types.h +++ b/drivers/gpu/drm/amd/display/include/set_mode_types.h @@ -84,7 +84,10 @@ union hdmi_info_packet { uint16_t bar_left; uint16_t bar_right; - uint8_t reserved[14]; + uint8_t F140_F143:4; + uint8_t ACE0_ACE3:4; + + uint8_t reserved[13]; } bits; struct info_packet_raw_data packet_raw_data; diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index a1055413bade..8601d371776e 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -240,16 +240,27 @@ struct dividers { struct fixed31_32 divider3; }; -static void build_coefficients(struct gamma_coefficients *coefficients, bool is_2_4) +enum gamma_type_index { + gamma_type_index_2_4, + gamma_type_index_2_2, + gamma_type_index_2_2_flat +}; + +static void build_coefficients(struct gamma_coefficients *coefficients, enum gamma_type_index type) { - static const int32_t numerator01[] = { 31308, 180000}; - static const int32_t numerator02[] = { 12920, 4500}; - static const int32_t numerator03[] = { 55, 99}; - static const int32_t numerator04[] = { 55, 99}; - static const int32_t numerator05[] = { 2400, 2200}; + static const int32_t numerator01[] = { 31308, 180000, 0}; + static const int32_t numerator02[] = { 12920, 4500, 0}; + static const int32_t numerator03[] = { 55, 99, 0}; + static const int32_t numerator04[] = { 55, 99, 0}; + static const int32_t numerator05[] = { 2400, 2200, 2200}; uint32_t i = 0; - uint32_t index = is_2_4 == true ? 0:1; + uint32_t index = 0; + + if (type == gamma_type_index_2_2) + index = 1; + else if (type == gamma_type_index_2_2_flat) + index = 2; do { coefficients->a0[i] = dc_fixpt_from_fraction( @@ -697,7 +708,7 @@ static void build_de_pq(struct pwl_float_data_ex *de_pq, static void build_regamma(struct pwl_float_data_ex *rgb_regamma, uint32_t hw_points_num, - const struct hw_x_point *coordinate_x, bool is_2_4) + const struct hw_x_point *coordinate_x, enum gamma_type_index type) { uint32_t i; @@ -705,7 +716,7 @@ static void build_regamma(struct pwl_float_data_ex *rgb_regamma, struct pwl_float_data_ex *rgb = rgb_regamma; const struct hw_x_point *coord_x = coordinate_x; - build_coefficients(&coeff, is_2_4); + build_coefficients(&coeff, type); i = 0; @@ -892,13 +903,13 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma, static void build_degamma(struct pwl_float_data_ex *curve, uint32_t hw_points_num, - const struct hw_x_point *coordinate_x, bool is_2_4) + const struct hw_x_point *coordinate_x, enum gamma_type_index type) { uint32_t i; struct gamma_coefficients coeff; uint32_t begin_index, end_index; - build_coefficients(&coeff, is_2_4); + build_coefficients(&coeff, type); i = 0; /* X points is 2^-25 to 2^7 @@ -1614,7 +1625,7 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, coordinates_x, output_tf->sdr_ref_white_level); } else if (tf == TRANSFER_FUNCTION_GAMMA22 && - fs_params != NULL) { + fs_params != NULL && fs_params->skip_tm == 0) { build_freesync_hdr(rgb_regamma, MAX_HW_POINTS, coordinates_x, @@ -1627,7 +1638,9 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, build_regamma(rgb_regamma, MAX_HW_POINTS, - coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? true:false); + coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? gamma_type_index_2_4 : + tf == TRANSFER_FUNCTION_GAMMA22 ? + gamma_type_index_2_2_flat : gamma_type_index_2_2); } map_regamma_hw_to_x_user(ramp, coeff, rgb_user, coordinates_x, axis_x, rgb_regamma, @@ -1832,7 +1845,9 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, build_degamma(curve, MAX_HW_POINTS, coordinates_x, - tf == TRANSFER_FUNCTION_SRGB ? true : false); + tf == TRANSFER_FUNCTION_SRGB ? + gamma_type_index_2_4 : tf == TRANSFER_FUNCTION_GAMMA22 ? + gamma_type_index_2_2_flat : gamma_type_index_2_2); else if (tf == TRANSFER_FUNCTION_LINEAR) { // just copy coordinates_x into curve i = 0; @@ -1932,7 +1947,10 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, build_regamma(rgb_regamma, MAX_HW_POINTS, - coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false); + coordinates_x, + trans == TRANSFER_FUNCTION_SRGB ? + gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ? + gamma_type_index_2_2_flat : gamma_type_index_2_2); for (i = 0; i <= MAX_HW_POINTS ; i++) { points->red[i] = rgb_regamma[i].r; points->green[i] = rgb_regamma[i].g; @@ -2002,7 +2020,8 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, kvfree(rgb_degamma); } else if (trans == TRANSFER_FUNCTION_SRGB || - trans == TRANSFER_FUNCTION_BT709) { + trans == TRANSFER_FUNCTION_BT709 || + trans == TRANSFER_FUNCTION_GAMMA22) { rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*rgb_degamma), GFP_KERNEL); @@ -2011,7 +2030,10 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, build_degamma(rgb_degamma, MAX_HW_POINTS, - coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false); + coordinates_x, + trans == TRANSFER_FUNCTION_SRGB ? + gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ? + gamma_type_index_2_2_flat : gamma_type_index_2_2); for (i = 0; i <= MAX_HW_POINTS ; i++) { points->red[i] = rgb_degamma[i].r; points->green[i] = rgb_degamma[i].g; diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h index a6e164df090a..369953fafadf 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -79,6 +79,7 @@ struct freesync_hdr_tf_params { unsigned int max_content; // luminance in nits unsigned int min_display; // luminance in 1/10000 nits unsigned int max_display; // luminance in nits + unsigned int skip_tm; // skip tm }; void setup_x_points_distribution(void); diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index db06fab2ad5c..bc13c552797f 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -63,7 +63,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream, if (stream->psr_version != 0) vscPacketRevision = 2; - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + /* Update to revision 5 for extended colorimetry support for DPCD 1.4+ */ + if (stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 && + stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED) vscPacketRevision = 5; /* VSC packet not needed based on the features diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h index a9575db8d7aa..6efcaa93e17b 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h @@ -30,4 +30,22 @@ #define mmDF_CS_UMC_AON0_DramBaseAddress0 0x0044 #define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX 0 +#define smnPerfMonCtlLo0 0x01d440UL +#define smnPerfMonCtlHi0 0x01d444UL +#define smnPerfMonCtlLo1 0x01d450UL +#define smnPerfMonCtlHi1 0x01d454UL +#define smnPerfMonCtlLo2 0x01d460UL +#define smnPerfMonCtlHi2 0x01d464UL +#define smnPerfMonCtlLo3 0x01d470UL +#define smnPerfMonCtlHi3 0x01d474UL + +#define smnPerfMonCtrLo0 0x01d448UL +#define smnPerfMonCtrHi0 0x01d44cUL +#define smnPerfMonCtrLo1 0x01d458UL +#define smnPerfMonCtrHi1 0x01d45cUL +#define smnPerfMonCtrLo2 0x01d468UL +#define smnPerfMonCtrHi2 0x01d46cUL +#define smnPerfMonCtrLo3 0x01d478UL +#define smnPerfMonCtrHi3 0x01d47cUL + #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h index 529b37db274c..f1d048e0ed2c 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h @@ -829,6 +829,8 @@ #define mmTD_CNTL_BASE_IDX 0 #define mmTD_STATUS 0x0526 #define mmTD_STATUS_BASE_IDX 0 +#define mmTD_EDC_CNT 0x052e +#define mmTD_EDC_CNT_BASE_IDX 0 #define mmTD_DSM_CNTL 0x052f #define mmTD_DSM_CNTL_BASE_IDX 0 #define mmTD_DSM_CNTL2 0x0530 @@ -845,6 +847,8 @@ #define mmTA_STATUS_BASE_IDX 0 #define mmTA_SCRATCH 0x0564 #define mmTA_SCRATCH_BASE_IDX 0 +#define mmTA_EDC_CNT 0x0586 +#define mmTA_EDC_CNT_BASE_IDX 0 // addressBlock: gc_gdsdec @@ -1051,6 +1055,13 @@ #define mmGC_USER_RB_BACKEND_DISABLE_BASE_IDX 0 +// addressBlock: gc_ea_gceadec2 +// base address: 0x9c00 +#define mmGCEA_EDC_CNT 0x0706 +#define mmGCEA_EDC_CNT_BASE_IDX 0 +#define mmGCEA_EDC_CNT2 0x0707 +#define mmGCEA_EDC_CNT2_BASE_IDX 0 + // addressBlock: gc_rmi_rmidec // base address: 0x9e00 #define mmRMI_GENERAL_CNTL 0x0780 @@ -1709,6 +1720,8 @@ #define mmTC_CFG_L1_VOLATILE_BASE_IDX 0 #define mmTC_CFG_L2_VOLATILE 0x0b23 #define mmTC_CFG_L2_VOLATILE_BASE_IDX 0 +#define mmTCI_EDC_CNT 0x0b60 +#define mmTCI_EDC_CNT_BASE_IDX 0 #define mmTCI_STATUS 0x0b61 #define mmTCI_STATUS_BASE_IDX 0 #define mmTCI_CNTL_1 0x0b62 @@ -2594,6 +2607,24 @@ #define mmCP_RB_DOORBELL_CONTROL_SCH_7_BASE_IDX 0 #define mmCP_RB_DOORBELL_CLEAR 0x1188 #define mmCP_RB_DOORBELL_CLEAR_BASE_IDX 0 +#define mmCPF_EDC_TAG_CNT 0x1189 +#define mmCPF_EDC_TAG_CNT_BASE_IDX 0 +#define mmCPF_EDC_ROQ_CNT 0x118a +#define mmCPF_EDC_ROQ_CNT_BASE_IDX 0 +#define mmCPG_EDC_TAG_CNT 0x118b +#define mmCPG_EDC_TAG_CNT_BASE_IDX 0 +#define mmCPG_EDC_DMA_CNT 0x118d +#define mmCPG_EDC_DMA_CNT_BASE_IDX 0 +#define mmCPC_EDC_SCRATCH_CNT 0x118e +#define mmCPC_EDC_SCRATCH_CNT_BASE_IDX 0 +#define mmCPC_EDC_UCODE_CNT 0x118f +#define mmCPC_EDC_UCODE_CNT_BASE_IDX 0 +#define mmDC_EDC_STATE_CNT 0x1191 +#define mmDC_EDC_STATE_CNT_BASE_IDX 0 +#define mmDC_EDC_CSINVOC_CNT 0x1192 +#define mmDC_EDC_CSINVOC_CNT_BASE_IDX 0 +#define mmDC_EDC_RESTORE_CNT 0x1193 +#define mmDC_EDC_RESTORE_CNT_BASE_IDX 0 #define mmCP_GFX_MQD_CONTROL 0x11a0 #define mmCP_GFX_MQD_CONTROL_BASE_IDX 0 #define mmCP_GFX_MQD_BASE_ADDR 0x11a1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h index 8c75669eb500..9470ec5e0f42 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h @@ -54,5 +54,8 @@ #define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258 #define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c +#define smnPCIE_RX_NUM_NAK 0x11180038 +#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c + #endif // _nbio_6_1_SMN_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h index 5563f0715896..caf5ffdc130a 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h @@ -51,4 +51,7 @@ #define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258 #define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c +#define smnPCIE_RX_NUM_NAK 0x11180038 +#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c + #endif // _nbio_7_0_SMN_HEADER diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h index c1457d880c4d..4bcacf529852 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h +++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h @@ -50,4 +50,7 @@ #define smnPCIE_PERF_CNTL_EVENT_LC_PORT_SEL 0x1118024c #define smnPCIE_PERF_CNTL_EVENT_CI_PORT_SEL 0x11180250 +#define smnPCIE_RX_NUM_NAK 0x11180038 +#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c + #endif // _nbio_7_4_0_SMN_HEADER diff --git a/drivers/gpu/drm/amd/include/cik_structs.h b/drivers/gpu/drm/amd/include/cik_structs.h index 749eab94e335..699e658c3cec 100644 --- a/drivers/gpu/drm/amd/include/cik_structs.h +++ b/drivers/gpu/drm/amd/include/cik_structs.h @@ -282,8 +282,7 @@ struct cik_sdma_rlc_registers { uint32_t reserved_123; uint32_t reserved_124; uint32_t reserved_125; - uint32_t reserved_126; - uint32_t reserved_127; + /* reserved_126,127: repurposed for driver-internal use */ uint32_t sdma_engine_id; uint32_t sdma_queue_id; }; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index b897aca9b4c9..98b9533e672b 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -174,6 +174,7 @@ struct tile_config { #define ALLOC_MEM_FLAGS_GTT (1 << 1) #define ALLOC_MEM_FLAGS_USERPTR (1 << 2) #define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) +#define ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) /* * Allocation flags attributes/access options. diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2b579ba9b685..9f661bf96ed0 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -109,8 +109,12 @@ enum amd_pp_sensors { AMDGPU_PP_SENSOR_UVD_DCLK, AMDGPU_PP_SENSOR_VCE_ECCLK, AMDGPU_PP_SENSOR_GPU_LOAD, + AMDGPU_PP_SENSOR_MEM_LOAD, AMDGPU_PP_SENSOR_GFX_MCLK, AMDGPU_PP_SENSOR_GPU_TEMP, + AMDGPU_PP_SENSOR_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP, + AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + AMDGPU_PP_SENSOR_MEM_TEMP, AMDGPU_PP_SENSOR_VCE_POWER, AMDGPU_PP_SENSOR_UVD_POWER, AMDGPU_PP_SENSOR_GPU_POWER, @@ -159,6 +163,13 @@ struct pp_states_info { uint32_t states[16]; }; +enum PP_HWMON_TEMP { + PP_TEMP_EDGE = 0, + PP_TEMP_JUNCTION, + PP_TEMP_MEM, + PP_TEMP_MAX +}; + #define PP_GROUP_MASK 0xF0000000 #define PP_GROUP_SHIFT 28 diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h index ceaf4932258d..8b383dbe1cda 100644 --- a/drivers/gpu/drm/amd/include/v9_structs.h +++ b/drivers/gpu/drm/amd/include/v9_structs.h @@ -151,8 +151,7 @@ struct v9_sdma_mqd { uint32_t reserved_123; uint32_t reserved_124; uint32_t reserved_125; - uint32_t reserved_126; - uint32_t reserved_127; + /* reserved_126,127: repurposed for driver-internal use */ uint32_t sdma_engine_id; uint32_t sdma_queue_id; }; diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h index 717fbae1d362..c17613287cd0 100644 --- a/drivers/gpu/drm/amd/include/vi_structs.h +++ b/drivers/gpu/drm/amd/include/vi_structs.h @@ -151,8 +151,7 @@ struct vi_sdma_mqd { uint32_t reserved_123; uint32_t reserved_124; uint32_t reserved_125; - uint32_t reserved_126; - uint32_t reserved_127; + /* reserved_126,127: repurposed for driver-internal use */ uint32_t sdma_engine_id; uint32_t sdma_queue_id; }; diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index eec329ab6037..3026c7e2d3ea 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -30,6 +30,36 @@ #include "atom.h" #include "amd_pcie.h" +int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version) +{ + int ret = 0; + + if (!if_version && !smu_version) + return -EINVAL; + + if (if_version) { + ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion); + if (ret) + return ret; + + ret = smu_read_smc_arg(smu, if_version); + if (ret) + return ret; + } + + if (smu_version) { + ret = smu_send_smc_msg(smu, SMU_MSG_GetSmuVersion); + if (ret) + return ret; + + ret = smu_read_smc_arg(smu, smu_version); + if (ret) + return ret; + } + + return ret; +} + int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, bool gate) { @@ -168,6 +198,8 @@ int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size) ATOM_COMMON_TABLE_HEADER *header = (ATOM_COMMON_TABLE_HEADER *)buf; int ret = 0; + if (!smu->pm_enabled) + return -EINVAL; if (header->usStructureSize != size) { pr_err("pp table size not matched !\n"); return -EIO; @@ -203,6 +235,8 @@ int smu_feature_init_dpm(struct smu_context *smu) int ret = 0; uint32_t unallowed_feature_mask[SMU_FEATURE_MAX/32]; + if (!smu->pm_enabled) + return ret; mutex_lock(&feature->mutex); bitmap_fill(feature->allowed, SMU_FEATURE_MAX); mutex_unlock(&feature->mutex); @@ -314,6 +348,7 @@ static int smu_early_init(void *handle) struct smu_context *smu = &adev->smu; smu->adev = adev; + smu->pm_enabled = !!amdgpu_dpm; mutex_init(&smu->mutex); return smu_set_funcs(adev); @@ -323,6 +358,9 @@ static int smu_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; + + if (!smu->pm_enabled) + return 0; mutex_lock(&smu->mutex); smu_handle_task(&adev->smu, smu->smu_dpm.dpm_level, @@ -406,9 +444,6 @@ static int smu_sw_init(void *handle) struct smu_context *smu = &adev->smu; int ret; - if (!is_support_sw_smu(adev)) - return -EINVAL; - smu->pool_size = adev->pm.smu_prv_buffer_size; smu->smu_feature.feature_num = SMU_FEATURE_MAX; mutex_init(&smu->smu_feature.mutex); @@ -460,9 +495,6 @@ static int smu_sw_fini(void *handle) struct smu_context *smu = &adev->smu; int ret; - if (!is_support_sw_smu(adev)) - return -EINVAL; - ret = smu_smc_table_sw_fini(smu); if (ret) { pr_err("Failed to sw fini smc table!\n"); @@ -612,10 +644,6 @@ static int smu_smc_table_hw_init(struct smu_context *smu, * check if the format_revision in vbios is up to pptable header * version, and the structure size is not 0. */ - ret = smu_get_clk_info_from_vbios(smu); - if (ret) - return ret; - ret = smu_check_pptable(smu); if (ret) return ret; @@ -716,6 +744,9 @@ static int smu_smc_table_hw_init(struct smu_context *smu, */ ret = smu_set_tool_table_location(smu); + if (!smu_is_dpm_running(smu)) + pr_info("dpm has been disabled\n"); + return ret; } @@ -788,9 +819,6 @@ static int smu_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; - if (!is_support_sw_smu(adev)) - return -EINVAL; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { ret = smu_load_microcode(smu); if (ret) @@ -831,7 +859,10 @@ static int smu_hw_init(void *handle) mutex_unlock(&smu->mutex); - adev->pm.dpm_enabled = true; + if (!smu->pm_enabled) + adev->pm.dpm_enabled = false; + else + adev->pm.dpm_enabled = true; pr_info("SMU is initialized successfully!\n"); @@ -849,9 +880,6 @@ static int smu_hw_fini(void *handle) struct smu_table_context *table_context = &smu->smu_table; int ret = 0; - if (!is_support_sw_smu(adev)) - return -EINVAL; - kfree(table_context->driver_pptable); table_context->driver_pptable = NULL; @@ -906,9 +934,6 @@ static int smu_suspend(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; - if (!is_support_sw_smu(adev)) - return -EINVAL; - ret = smu_system_features_control(smu, false); if (ret) return ret; @@ -924,9 +949,6 @@ static int smu_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; - if (!is_support_sw_smu(adev)) - return -EINVAL; - pr_info("SMU is resuming...\n"); mutex_lock(&smu->mutex); @@ -955,7 +977,7 @@ int smu_display_configuration_change(struct smu_context *smu, int index = 0; int num_of_active_display = 0; - if (!is_support_sw_smu(smu->adev)) + if (!smu->pm_enabled || !is_support_sw_smu(smu->adev)) return -EINVAL; if (!display_config) @@ -1083,7 +1105,7 @@ static int smu_enable_umd_pstate(void *handle, struct smu_context *smu = (struct smu_context*)(handle); struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); - if (!smu_dpm_ctx->dpm_context) + if (!smu->pm_enabled || !smu_dpm_ctx->dpm_context) return -EINVAL; if (!(smu_dpm_ctx->dpm_level & profile_mode_mask)) { @@ -1126,6 +1148,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu, long workload; struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + if (!smu->pm_enabled) + return -EINVAL; if (!skip_display_settings) { ret = smu_display_config_changed(smu); if (ret) { @@ -1134,6 +1158,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu, } } + if (!smu->pm_enabled) + return -EINVAL; ret = smu_apply_clocks_adjust_rules(smu); if (ret) { pr_err("Failed to apply clocks adjust rules!"); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index 70f7f47a2fcf..cc57fb953e62 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -225,7 +225,16 @@ int phm_register_irq_handlers(struct pp_hwmgr *hwmgr) int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) { int ret = 0; - struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX}; + struct PP_TemperatureRange range = { + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MAX, + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MAX, + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MAX}; struct amdgpu_device *adev = hwmgr->adev; if (hwmgr->hwmgr_func->get_thermal_temperature_range) @@ -239,6 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) adev->pm.dpm.thermal.min_temp = range.min; adev->pm.dpm.thermal.max_temp = range.max; + adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max; + adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; + adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; + adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max; + adev->pm.dpm.thermal.min_mem_temp = range.mem_min; + adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; + adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 048757e8f494..16591be8b0ca 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -3532,9 +3532,12 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; return 0; case AMDGPU_PP_SENSOR_GPU_LOAD: + case AMDGPU_PP_SENSOR_MEM_LOAD: offset = data->soft_regs_start + smum_get_offsetof(hwmgr, SMU_SoftRegisters, - AverageGraphicsActivity); + (idx == AMDGPU_PP_SENSOR_GPU_LOAD) ? + AverageGraphicsActivity: + AverageMemoryActivity); activity_percent = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset); activity_percent += 0x80; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 384c37875cd0..1d9bb29adaef 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -356,6 +356,7 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) struct vega10_hwmgr *data = hwmgr->backend; int i; uint32_t sub_vendor_id, hw_revision; + uint32_t top32, bottom32; struct amdgpu_device *adev = hwmgr->adev; vega10_initialize_power_tune_defaults(hwmgr); @@ -499,6 +500,14 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) (hw_revision == 0) && (sub_vendor_id != 0x1002)) data->smu_features[GNLD_PCC_LIMIT].supported = true; + + /* Get the SN to turn into a Unique ID */ + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32); + top32 = smum_get_argument(hwmgr); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32); + bottom32 = smum_get_argument(hwmgr); + + adev->unique_id = ((uint64_t)bottom32 << 32) | top32; } #ifdef PPLIB_VEGA10_EVV_SUPPORT @@ -2267,8 +2276,8 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) pp_table->AcgAvfsGb.m1 = avfs_params.ulAcgGbFuseTableM1; pp_table->AcgAvfsGb.m2 = avfs_params.ulAcgGbFuseTableM2; pp_table->AcgAvfsGb.b = avfs_params.ulAcgGbFuseTableB; - pp_table->AcgAvfsGb.m1_shift = 0; - pp_table->AcgAvfsGb.m2_shift = 0; + pp_table->AcgAvfsGb.m1_shift = 24; + pp_table->AcgAvfsGb.m2_shift = 12; pp_table->AcgAvfsGb.b_shift = 0; } else { @@ -2364,6 +2373,10 @@ static int vega10_avfs_enable(struct pp_hwmgr *hwmgr, bool enable) struct vega10_hwmgr *data = hwmgr->backend; if (data->smu_features[GNLD_AVFS].supported) { + /* Already enabled or disabled */ + if (!(enable ^ data->smu_features[GNLD_AVFS].enabled)) + return 0; + if (enable) { PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr, true, @@ -2466,11 +2479,6 @@ static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr) return; } } - - if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { - data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; - data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; - } } /** @@ -3683,6 +3691,10 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr, vega10_update_avfs(hwmgr); + /* + * Clear all OD flags except DPMTABLE_OD_UPDATE_VDDC. + * That will help to keep AVFS disabled. + */ data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC; return 0; @@ -3785,6 +3797,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx, *((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr); *size = 4; break; + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHotspot); + *((uint32_t *)value) = smum_get_argument(hwmgr) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; + case AMDGPU_PP_SENSOR_MEM_TEMP: + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM); + *((uint32_t *)value) = smum_get_argument(hwmgr) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; case AMDGPU_PP_SENSOR_UVD_POWER: *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; *size = 4; @@ -4852,12 +4876,22 @@ static int vega10_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *thermal_data) { - struct phm_ppt_v2_information *table_info = - (struct phm_ppt_v2_information *)hwmgr->pptable; + struct vega10_hwmgr *data = hwmgr->backend; + PPTable_t *pp_table = &(data->smc_state_table.pp_table); memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); - thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp * + thermal_data->max = pp_table->TedgeLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_crit_max = pp_table->ThbmLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; @@ -4988,13 +5022,70 @@ static bool vega10_check_clk_voltage_valid(struct pp_hwmgr *hwmgr, return true; } +static void vega10_odn_update_power_state(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct pp_power_state *ps = hwmgr->request_ps; + struct vega10_power_state *vega10_ps; + struct vega10_single_dpm_table *gfx_dpm_table = + &data->dpm_table.gfx_table; + struct vega10_single_dpm_table *soc_dpm_table = + &data->dpm_table.soc_table; + struct vega10_single_dpm_table *mem_dpm_table = + &data->dpm_table.mem_table; + int max_level; + + if (!ps) + return; + + vega10_ps = cast_phw_vega10_power_state(&ps->hardware); + max_level = vega10_ps->performance_level_count - 1; + + if (vega10_ps->performance_levels[max_level].gfx_clock != + gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].gfx_clock = + gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value; + + if (vega10_ps->performance_levels[max_level].soc_clock != + soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].soc_clock = + soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value; + + if (vega10_ps->performance_levels[max_level].mem_clock != + mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].mem_clock = + mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value; + + if (!hwmgr->ps) + return; + + ps = (struct pp_power_state *)((unsigned long)(hwmgr->ps) + hwmgr->ps_size * (hwmgr->num_ps - 1)); + vega10_ps = cast_phw_vega10_power_state(&ps->hardware); + max_level = vega10_ps->performance_level_count - 1; + + if (vega10_ps->performance_levels[max_level].gfx_clock != + gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].gfx_clock = + gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value; + + if (vega10_ps->performance_levels[max_level].soc_clock != + soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].soc_clock = + soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value; + + if (vega10_ps->performance_levels[max_level].mem_clock != + mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value) + vega10_ps->performance_levels[max_level].mem_clock = + mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value; +} + static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, enum PP_OD_DPM_TABLE_COMMAND type) { struct vega10_hwmgr *data = hwmgr->backend; struct phm_ppt_v2_information *table_info = hwmgr->pptable; struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = table_info->vdd_dep_on_socclk; - struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.soc_table; + struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.mem_table; struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_on_socclk = &data->odn_dpm_table.vdd_dep_on_socclk; @@ -5018,7 +5109,8 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, break; } if (j == od_vddc_lookup_table->count) { - od_vddc_lookup_table->entries[j-1].us_vdd = + j = od_vddc_lookup_table->count - 1; + od_vddc_lookup_table->entries[j].us_vdd = podn_vdd_dep->entries[i].vddc; data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; } @@ -5026,25 +5118,38 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, } dpm_table = &data->dpm_table.soc_table; for (i = 0; i < dep_table->count; i++) { - if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[dep_table->count-1].vddInd && - dep_table->entries[i].clk < podn_vdd_dep->entries[dep_table->count-1].clk) { + if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[podn_vdd_dep->count-1].vddInd && + dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count-1].clk) { data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; - podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[dep_table->count-1].clk; - dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk; + for (; (i < dep_table->count) && + (dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk); i++) { + podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[podn_vdd_dep->count-1].clk; + dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk; + } + break; + } else { + dpm_table->dpm_levels[i].value = dep_table->entries[i].clk; + podn_vdd_dep_on_socclk->entries[i].vddc = dep_table->entries[i].vddc; + podn_vdd_dep_on_socclk->entries[i].vddInd = dep_table->entries[i].vddInd; + podn_vdd_dep_on_socclk->entries[i].clk = dep_table->entries[i].clk; } } if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk < - podn_vdd_dep->entries[dep_table->count-1].clk) { + podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk) { data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; - podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = podn_vdd_dep->entries[dep_table->count-1].clk; - dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = podn_vdd_dep->entries[dep_table->count-1].clk; + podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = + podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk; + dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = + podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk; } if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd < - podn_vdd_dep->entries[dep_table->count-1].vddInd) { + podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd) { data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; - podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = podn_vdd_dep->entries[dep_table->count-1].vddInd; + podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = + podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd; } } + vega10_odn_update_power_state(hwmgr); } static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, @@ -5079,6 +5184,11 @@ static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, } else if (PP_OD_RESTORE_DEFAULT_TABLE == type) { memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table)); vega10_odn_initial_default_setting(hwmgr); + vega10_odn_update_power_state(hwmgr); + /* force to update all clock tables */ + data->need_update_dpm_table = DPMTABLE_UPDATE_SCLK | + DPMTABLE_UPDATE_MCLK | + DPMTABLE_UPDATE_SOCCLK; return 0; } else if (PP_OD_COMMIT_DPM_TABLE == type) { vega10_check_dpm_table_updated(hwmgr); @@ -5201,8 +5311,12 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; + hwmgr->hwmgr_func = &vega10_hwmgr_funcs; hwmgr->pptable_func = &vega10_pptable_funcs; + if (amdgpu_passthrough(adev)) + return vega10_baco_set_cap(hwmgr); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index b6767d74dc85..83d22cdeaa29 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, return result; } + +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) +{ + int result = 0; + + const ATOM_Vega10_POWERPLAYTABLE *powerplay_table; + + powerplay_table = get_powerplay_table(hwmgr); + + PP_ASSERT_WITH_CODE((powerplay_table != NULL), + "Missing PowerPlay Table!", return -1); + + result = check_powerplay_tables(hwmgr, powerplay_table); + + PP_ASSERT_WITH_CODE((result == 0), + "check_powerplay_tables failed", return result); + + set_hw_cap( + hwmgr, + 0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) & ATOM_VEGA10_PP_PLATFORM_CAP_BACO), + PHM_PlatformCaps_BACO); + return result; +} + diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h index d83ed2af7aa3..da5fbec9b0cd 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h @@ -59,4 +59,5 @@ extern int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr); extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index, struct pp_power_state *power_state, int (*call_back_func)(struct pp_hwmgr *, void *, struct pp_power_state *, void *, uint32_t)); +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr); #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 707cd4b0357f..efb6d3762feb 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -289,6 +289,8 @@ static int vega12_set_features_platform_caps(struct pp_hwmgr *hwmgr) static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr) { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); + struct amdgpu_device *adev = hwmgr->adev; + uint32_t top32, bottom32; int i; data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id = @@ -353,6 +355,14 @@ static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr) ((data->registry_data.disallowed_features >> i) & 1) ? false : true; } + + /* Get the SN to turn into a Unique ID */ + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32); + top32 = smum_get_argument(hwmgr); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32); + bottom32 = smum_get_argument(hwmgr); + + adev->unique_id = ((uint64_t)bottom32 << 32) | top32; } static int vega12_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr) @@ -1237,21 +1247,39 @@ static uint32_t vega12_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low) return (mem_clk * 100); } +static int vega12_get_metrics_table(struct pp_hwmgr *hwmgr, SmuMetrics_t *metrics_table) +{ + struct vega12_hwmgr *data = + (struct vega12_hwmgr *)(hwmgr->backend); + int ret = 0; + + if (!data->metrics_time || time_after(jiffies, data->metrics_time + HZ / 2)) { + ret = smum_smc_table_manager(hwmgr, (uint8_t *)metrics_table, + TABLE_SMU_METRICS, true); + if (ret) { + pr_info("Failed to export SMU metrics table!\n"); + return ret; + } + memcpy(&data->metrics_table, metrics_table, sizeof(SmuMetrics_t)); + data->metrics_time = jiffies; + } else + memcpy(metrics_table, &data->metrics_table, sizeof(SmuMetrics_t)); + + return ret; +} + static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query) { -#if 0 - uint32_t value; + SmuMetrics_t metrics_table; + int ret = 0; - PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr, - PPSMC_MSG_GetCurrPkgPwr), - "Failed to get current package power!", - return -EINVAL); + ret = vega12_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; - value = smum_get_argument(hwmgr); - /* power value is an integer */ - *query = value << 8; -#endif - return 0; + *query = metrics_table.CurrSocketPower << 8; + + return ret; } static int vega12_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx_freq) @@ -1290,25 +1318,27 @@ static int vega12_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_f static int vega12_get_current_activity_percent( struct pp_hwmgr *hwmgr, + int idx, uint32_t *activity_percent) { + SmuMetrics_t metrics_table; int ret = 0; - uint32_t current_activity = 50; -#if 0 - ret = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetAverageGfxActivity, 0); - if (!ret) { - current_activity = smum_get_argument(hwmgr); - if (current_activity > 100) { - PP_ASSERT(false, - "[GetCurrentActivityPercent] Activity Percentage Exceeds 100!"); - current_activity = 100; - } - } else - PP_ASSERT(false, - "[GetCurrentActivityPercent] Attempt To Send Get Average Graphics Activity to SMU Failed!"); -#endif - *activity_percent = current_activity; + ret = vega12_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + switch (idx) { + case AMDGPU_PP_SENSOR_GPU_LOAD: + *activity_percent = metrics_table.AverageGfxActivity; + break; + case AMDGPU_PP_SENSOR_MEM_LOAD: + *activity_percent = metrics_table.AverageUclkActivity; + break; + default: + pr_err("Invalid index for retrieving clock activity\n"); + return -EINVAL; + } return ret; } @@ -1317,6 +1347,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, void *value, int *size) { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); + SmuMetrics_t metrics_table; int ret = 0; switch (idx) { @@ -1331,7 +1362,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; break; case AMDGPU_PP_SENSOR_GPU_LOAD: - ret = vega12_get_current_activity_percent(hwmgr, (uint32_t *)value); + case AMDGPU_PP_SENSOR_MEM_LOAD: + ret = vega12_get_current_activity_percent(hwmgr, idx, (uint32_t *)value); if (!ret) *size = 4; break; @@ -1339,6 +1371,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, *((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr); *size = 4; break; + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + ret = vega12_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureHotspot * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; + case AMDGPU_PP_SENSOR_MEM_TEMP: + ret = vega12_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureHBM * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; case AMDGPU_PP_SENSOR_UVD_POWER: *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; *size = 4; @@ -1349,6 +1399,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, break; case AMDGPU_PP_SENSOR_GPU_POWER: ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value); + if (!ret) + *size = 4; break; case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK: ret = vega12_get_enabled_smc_features(hwmgr, (uint64_t *)value); @@ -2526,12 +2578,23 @@ static int vega12_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *thermal_data) { - struct phm_ppt_v3_information *pptable_information = - (struct phm_ppt_v3_information *)hwmgr->pptable; + struct vega12_hwmgr *data = + (struct vega12_hwmgr *)(hwmgr->backend); + PPTable_t *pp_table = &(data->smc_state_table.pp_table); memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); - thermal_data->max = pptable_information->us_software_shutdown_temp * + thermal_data->max = pp_table->TedgeLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_crit_max = pp_table->ThbmLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h index b3e424d28994..73875399666a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h @@ -396,6 +396,9 @@ struct vega12_hwmgr { /* ---- Gfxoff ---- */ bool gfxoff_controlled_by_driver; + + unsigned long metrics_time; + SmuMetrics_t metrics_table; }; #define VEGA12_DPM2_NEAR_TDP_DEC 10 diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 9b9f87b84910..f27c6fbb192e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -97,6 +97,27 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr) if (hwmgr->smu_version < 0x282100) data->registry_data.disallowed_features |= FEATURE_ECC_MASK; + if (!(hwmgr->feature_mask & PP_PCIE_DPM_MASK)) + data->registry_data.disallowed_features |= FEATURE_DPM_LINK_MASK; + + if (!(hwmgr->feature_mask & PP_SCLK_DPM_MASK)) + data->registry_data.disallowed_features |= FEATURE_DPM_GFXCLK_MASK; + + if (!(hwmgr->feature_mask & PP_SOCCLK_DPM_MASK)) + data->registry_data.disallowed_features |= FEATURE_DPM_SOCCLK_MASK; + + if (!(hwmgr->feature_mask & PP_MCLK_DPM_MASK)) + data->registry_data.disallowed_features |= FEATURE_DPM_UCLK_MASK; + + if (!(hwmgr->feature_mask & PP_DCEFCLK_DPM_MASK)) + data->registry_data.disallowed_features |= FEATURE_DPM_DCEFCLK_MASK; + + if (!(hwmgr->feature_mask & PP_ULV_MASK)) + data->registry_data.disallowed_features |= FEATURE_ULV_MASK; + + if (!(hwmgr->feature_mask & PP_SCLK_DEEP_SLEEP_MASK)) + data->registry_data.disallowed_features |= FEATURE_DS_GFXCLK_MASK; + data->registry_data.od_state_in_dc_support = 0; data->registry_data.thermal_support = 1; data->registry_data.skip_baco_hardware = 0; @@ -303,6 +324,8 @@ static int vega20_set_features_platform_caps(struct pp_hwmgr *hwmgr) static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr) { struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); + struct amdgpu_device *adev = hwmgr->adev; + uint32_t top32, bottom32; int i; data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id = @@ -372,6 +395,14 @@ static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr) ((data->registry_data.disallowed_features >> i) & 1) ? false : true; } + + /* Get the SN to turn into a Unique ID */ + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32); + top32 = smum_get_argument(hwmgr); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32); + bottom32 = smum_get_argument(hwmgr); + + adev->unique_id = ((uint64_t)bottom32 << 32) | top32; } static int vega20_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr) @@ -2094,6 +2125,7 @@ static int vega20_get_current_clk_freq(struct pp_hwmgr *hwmgr, } static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr, + int idx, uint32_t *activity_percent) { int ret = 0; @@ -2103,7 +2135,17 @@ static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr, if (ret) return ret; - *activity_percent = metrics_table.AverageGfxActivity; + switch (idx) { + case AMDGPU_PP_SENSOR_GPU_LOAD: + *activity_percent = metrics_table.AverageGfxActivity; + break; + case AMDGPU_PP_SENSOR_MEM_LOAD: + *activity_percent = metrics_table.AverageUclkActivity; + break; + default: + pr_err("Invalid index for retrieving clock activity\n"); + return -EINVAL; + } return ret; } @@ -2134,14 +2176,33 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; break; case AMDGPU_PP_SENSOR_GPU_LOAD: - ret = vega20_get_current_activity_percent(hwmgr, (uint32_t *)value); + case AMDGPU_PP_SENSOR_MEM_LOAD: + ret = vega20_get_current_activity_percent(hwmgr, idx, (uint32_t *)value); if (!ret) *size = 4; break; - case AMDGPU_PP_SENSOR_GPU_TEMP: + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: *((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr); *size = 4; break; + case AMDGPU_PP_SENSOR_EDGE_TEMP: + ret = vega20_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureEdge * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; + case AMDGPU_PP_SENSOR_MEM_TEMP: + ret = vega20_get_metrics_table(hwmgr, &metrics_table); + if (ret) + return ret; + + *((uint32_t *)value) = metrics_table.TemperatureHBM * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + *size = 4; + break; case AMDGPU_PP_SENSOR_UVD_POWER: *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; *size = 4; @@ -3974,12 +4035,23 @@ static int vega20_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *thermal_data) { - struct phm_ppt_v3_information *pptable_information = - (struct phm_ppt_v3_information *)hwmgr->pptable; + struct vega20_hwmgr *data = + (struct vega20_hwmgr *)(hwmgr->backend); + PPTable_t *pp_table = &(data->smc_state_table.pp_table); memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); - thermal_data->max = pptable_information->us_software_shutdown_temp * + thermal_data->max = pp_table->TedgeLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_crit_max = pp_table->ThbmLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index c8b168b3413b..3eb1de9ecf73 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -401,8 +401,12 @@ struct smu_context uint32_t workload_setting[WORKLOAD_POLICY_MAX]; uint32_t power_profile_mode; uint32_t default_power_profile_mode; + bool pm_enabled; uint32_t smc_if_version; + + unsigned long metrics_time; + void *metrics_table; }; struct pptable_funcs { @@ -458,6 +462,8 @@ struct pptable_funcs { uint32_t *mclk_mask, uint32_t *soc_mask); int (*set_cpu_power_state)(struct smu_context *smu); + int (*set_ppfeature_status)(struct smu_context *smu, uint64_t ppfeatures); + int (*get_ppfeature_status)(struct smu_context *smu, char *buf); }; struct smu_funcs @@ -727,7 +733,10 @@ struct smu_funcs ((smu)->funcs->get_mclk ? (smu)->funcs->get_mclk((smu), (low)) : 0) #define smu_set_xgmi_pstate(smu, pstate) \ ((smu)->funcs->set_xgmi_pstate ? (smu)->funcs->set_xgmi_pstate((smu), (pstate)) : 0) - +#define smu_set_ppfeature_status(smu, ppfeatures) \ + ((smu)->ppt_funcs->set_ppfeature_status ? (smu)->ppt_funcs->set_ppfeature_status((smu), (ppfeatures)) : -EINVAL) +#define smu_get_ppfeature_status(smu, buf) \ + ((smu)->ppt_funcs->get_ppfeature_status ? (smu)->ppt_funcs->get_ppfeature_status((smu), (buf)) : -EINVAL) extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table, uint16_t *size, uint8_t *frev, uint8_t *crev, @@ -767,4 +776,5 @@ extern int smu_dpm_set_power_gate(struct smu_context *smu,uint32_t block_type, b extern int smu_handle_task(struct smu_context *smu, enum amd_dpm_forced_level level, enum amd_pp_task task_id); +int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version); #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h index a99b5cbb113e..a5f2227a3971 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h +++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h @@ -124,6 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock { struct PP_TemperatureRange { int min; int max; + int edge_emergency_max; + int hotspot_min; + int hotspot_crit_max; + int hotspot_emergency_max; + int mem_min; + int mem_crit_max; + int mem_emergency_max; }; struct PP_StateValidationBlock { diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h index 201d2b6329ab..3e30768f9e1c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h @@ -27,14 +27,18 @@ static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] = { - {-273150, 99000}, - { 120000, 120000}, + {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, + { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000}, }; static const struct PP_TemperatureRange SMU7ThermalPolicy[] = { - {-273150, 99000}, - { 120000, 120000}, + {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, + { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000}, }; +#define CTF_OFFSET_EDGE 5 +#define CTF_OFFSET_HOTSPOT 5 +#define CTF_OFFSET_HBM 5 + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index aa8d81f4111e..02c965d64256 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -36,6 +36,9 @@ #define smnMP0_FW_INTF 0x30101c0 #define smnMP1_PUB_CTRL 0x3010b14 +#define TEMP_RANGE_MIN (0) +#define TEMP_RANGE_MAX (80 * 1000) + struct smu_11_0_max_sustainable_clocks { uint32_t display_clock; uint32_t phy_clock; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index 82550a8a3a3f..c5288831aa15 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -41,6 +41,7 @@ enum SMU_MEMBER { HandshakeDisables = 0, VoltageChangeTimeout, AverageGraphicsActivity, + AverageMemoryActivity, PreVBlankGap, VBlankTimeout, UcodeLoadStatus, diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index 92903a4cc4d8..d2eeb6240484 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -223,20 +223,27 @@ static int smu_v11_0_check_fw_status(struct smu_context *smu) static int smu_v11_0_check_fw_version(struct smu_context *smu) { - uint32_t smu_version = 0xff; + uint32_t if_version = 0xff, smu_version = 0xff; + uint16_t smu_major; + uint8_t smu_minor, smu_debug; int ret = 0; - ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion); + ret = smu_get_smc_version(smu, &if_version, &smu_version); if (ret) - goto err; + return ret; - ret = smu_read_smc_arg(smu, &smu_version); - if (ret) - goto err; + smu_major = (smu_version >> 16) & 0xffff; + smu_minor = (smu_version >> 8) & 0xff; + smu_debug = (smu_version >> 0) & 0xff; + + pr_info("SMU Driver IF Version = 0x%08x, SMU FW Version = 0x%08x (%d.%d.%d)\n", + if_version, smu_version, smu_major, smu_minor, smu_debug); - if (smu_version != smu->smc_if_version) + if (if_version != smu->smc_if_version) { + pr_err("SMU driver if version not matched\n"); ret = -EINVAL; -err: + } + return ret; } @@ -353,6 +360,8 @@ static int smu_v11_0_init_power(struct smu_context *smu) { struct smu_power_context *smu_power = &smu->smu_power; + if (!smu->pm_enabled) + return 0; if (smu_power->power_context || smu_power->power_context_size != 0) return -EINVAL; @@ -362,6 +371,13 @@ static int smu_v11_0_init_power(struct smu_context *smu) return -ENOMEM; smu_power->power_context_size = sizeof(struct smu_11_0_dpm_context); + smu->metrics_time = 0; + smu->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL); + if (!smu->metrics_table) { + kfree(smu_power->power_context); + return -ENOMEM; + } + return 0; } @@ -369,10 +385,14 @@ static int smu_v11_0_fini_power(struct smu_context *smu) { struct smu_power_context *smu_power = &smu->smu_power; + if (!smu->pm_enabled) + return 0; if (!smu_power->power_context || smu_power->power_context_size == 0) return -EINVAL; + kfree(smu->metrics_table); kfree(smu_power->power_context); + smu->metrics_table = NULL; smu_power->power_context = NULL; smu_power->power_context_size = 0; @@ -634,6 +654,8 @@ static int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu) { struct smu_table_context *table_context = &smu->smu_table; + if (!smu->pm_enabled) + return 0; if (!table_context) return -EINVAL; @@ -662,6 +684,9 @@ static int smu_v11_0_set_tool_table_location(struct smu_context *smu) static int smu_v11_0_init_display(struct smu_context *smu) { int ret = 0; + + if (!smu->pm_enabled) + return ret; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, 0); return ret; } @@ -671,6 +696,8 @@ static int smu_v11_0_update_feature_enable_state(struct smu_context *smu, uint32 uint32_t feature_low = 0, feature_high = 0; int ret = 0; + if (!smu->pm_enabled) + return ret; if (feature_id >= 0 && feature_id < 31) feature_low = (1 << feature_id); else if (feature_id > 31 && feature_id < 63) @@ -777,10 +804,13 @@ static int smu_v11_0_system_features_control(struct smu_context *smu, uint32_t feature_mask[2]; int ret = 0; - ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures : - SMU_MSG_DisableAllSmuFeatures)); - if (ret) - return ret; + if (smu->pm_enabled) { + ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures : + SMU_MSG_DisableAllSmuFeatures)); + if (ret) + return ret; + } + ret = smu_feature_get_enabled_mask(smu, feature_mask, 2); if (ret) return ret; @@ -797,6 +827,8 @@ static int smu_v11_0_notify_display_change(struct smu_context *smu) { int ret = 0; + if (!smu->pm_enabled) + return ret; if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT)) ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1); @@ -809,6 +841,8 @@ smu_v11_0_get_max_sustainable_clock(struct smu_context *smu, uint32_t *clock, { int ret = 0; + if (!smu->pm_enabled) + return ret; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDcModeMaxDpmFreq, clock_select << 16); if (ret) { @@ -995,9 +1029,20 @@ static int smu_v11_0_get_current_clk_freq(struct smu_context *smu, uint32_t clk_ static int smu_v11_0_get_thermal_range(struct smu_context *smu, struct PP_TemperatureRange *range) { + PPTable_t *pptable = smu->smu_table.driver_pptable; memcpy(range, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange)); - range->max = smu->smu_table.software_shutdown_temp * + range->max = pptable->TedgeLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->edge_emergency_max = (pptable->TedgeLimit + CTF_OFFSET_EDGE) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->hotspot_crit_max = pptable->ThotspotLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->hotspot_emergency_max = (pptable->ThotspotLimit + CTF_OFFSET_HOTSPOT) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->mem_crit_max = pptable->ThbmLimit * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + range->mem_emergency_max = (pptable->ThbmLimit + CTF_OFFSET_HBM)* PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; @@ -1062,9 +1107,20 @@ static int smu_v11_0_set_thermal_fan_table(struct smu_context *smu) static int smu_v11_0_start_thermal_control(struct smu_context *smu) { int ret = 0; - struct PP_TemperatureRange range; + struct PP_TemperatureRange range = { + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MAX, + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MAX, + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MAX}; struct amdgpu_device *adev = smu->adev; + if (!smu->pm_enabled) + return ret; smu_v11_0_get_thermal_range(smu, &range); if (smu->smu_table.thermal_controller_type) { @@ -1082,11 +1138,39 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu) adev->pm.dpm.thermal.min_temp = range.min; adev->pm.dpm.thermal.max_temp = range.max; + adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max; + adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; + adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; + adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max; + adev->pm.dpm.thermal.min_mem_temp = range.mem_min; + adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; + adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; + + return ret; +} + +static int smu_v11_0_get_metrics_table(struct smu_context *smu, + SmuMetrics_t *metrics_table) +{ + int ret = 0; + + if (!smu->metrics_time || time_after(jiffies, smu->metrics_time + HZ / 1000)) { + ret = smu_update_table(smu, TABLE_SMU_METRICS, + (void *)metrics_table, false); + if (ret) { + pr_info("Failed to export SMU metrics table!\n"); + return ret; + } + memcpy(smu->metrics_table, metrics_table, sizeof(SmuMetrics_t)); + smu->metrics_time = jiffies; + } else + memcpy(metrics_table, smu->metrics_table, sizeof(SmuMetrics_t)); return ret; } static int smu_v11_0_get_current_activity_percent(struct smu_context *smu, + enum amd_pp_sensors sensor, uint32_t *value) { int ret = 0; @@ -1095,31 +1179,64 @@ static int smu_v11_0_get_current_activity_percent(struct smu_context *smu, if (!value) return -EINVAL; - ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false); + ret = smu_v11_0_get_metrics_table(smu, &metrics); if (ret) return ret; - *value = metrics.AverageGfxActivity; + switch (sensor) { + case AMDGPU_PP_SENSOR_GPU_LOAD: + *value = metrics.AverageGfxActivity; + break; + case AMDGPU_PP_SENSOR_MEM_LOAD: + *value = metrics.AverageUclkActivity; + break; + default: + pr_err("Invalid sensor for retrieving clock activity\n"); + return -EINVAL; + } return 0; } -static int smu_v11_0_thermal_get_temperature(struct smu_context *smu, uint32_t *value) +static int smu_v11_0_thermal_get_temperature(struct smu_context *smu, + enum amd_pp_sensors sensor, + uint32_t *value) { struct amdgpu_device *adev = smu->adev; + SmuMetrics_t metrics; uint32_t temp = 0; + int ret = 0; if (!value) return -EINVAL; - temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); - temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> - CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; + ret = smu_v11_0_get_metrics_table(smu, &metrics); + if (ret) + return ret; - temp = temp & 0x1ff; - temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES; + switch (sensor) { + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); + temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> + CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; + + temp = temp & 0x1ff; + temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES; - *value = temp; + *value = temp; + break; + case AMDGPU_PP_SENSOR_EDGE_TEMP: + *value = metrics.TemperatureEdge * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + case AMDGPU_PP_SENSOR_MEM_TEMP: + *value = metrics.TemperatureHBM * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + break; + default: + pr_err("Invalid sensor for retrieving temp\n"); + return -EINVAL; + } return 0; } @@ -1132,7 +1249,7 @@ static int smu_v11_0_get_gpu_power(struct smu_context *smu, uint32_t *value) if (!value) return -EINVAL; - ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false); + ret = smu_v11_0_get_metrics_table(smu, &metrics); if (ret) return ret; @@ -1174,7 +1291,9 @@ static int smu_v11_0_read_sensor(struct smu_context *smu, int ret = 0; switch (sensor) { case AMDGPU_PP_SENSOR_GPU_LOAD: + case AMDGPU_PP_SENSOR_MEM_LOAD: ret = smu_v11_0_get_current_activity_percent(smu, + sensor, (uint32_t *)data); *size = 4; break; @@ -1186,8 +1305,10 @@ static int smu_v11_0_read_sensor(struct smu_context *smu, ret = smu_get_current_clk_freq(smu, PPCLK_GFXCLK, (uint32_t *)data); *size = 4; break; - case AMDGPU_PP_SENSOR_GPU_TEMP: - ret = smu_v11_0_thermal_get_temperature(smu, (uint32_t *)data); + case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: + case AMDGPU_PP_SENSOR_EDGE_TEMP: + case AMDGPU_PP_SENSOR_MEM_TEMP: + ret = smu_v11_0_thermal_get_temperature(smu, sensor, (uint32_t *)data); *size = 4; break; case AMDGPU_PP_SENSOR_GPU_POWER: @@ -1235,6 +1356,8 @@ smu_v11_0_display_clock_voltage_request(struct smu_context *smu, PPCLK_e clk_select = 0; uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; + if (!smu->pm_enabled) + return -EINVAL; if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) { switch (clk_type) { case amd_pp_dcef_clock: @@ -1518,7 +1641,7 @@ static int smu_v11_0_get_power_profile_mode(struct smu_context *smu, char *buf) "PD_Data_error_rate_coeff"}; int result = 0; - if (!buf) + if (!smu->pm_enabled || !buf) return -EINVAL; size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n", @@ -1605,6 +1728,8 @@ static int smu_v11_0_set_power_profile_mode(struct smu_context *smu, long *input smu->power_profile_mode = input[size]; + if (!smu->pm_enabled) + return ret; if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { pr_err("Invalid power profile mode %d\n", smu->power_profile_mode); return -EINVAL; @@ -1710,24 +1835,24 @@ static int smu_v11_0_update_od8_settings(struct smu_context *smu, static int smu_v11_0_dpm_set_uvd_enable(struct smu_context *smu, bool enable) { - if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT)) + if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT)) return 0; - if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT)) + if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT)) return 0; - return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable); + return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable); } static int smu_v11_0_dpm_set_vce_enable(struct smu_context *smu, bool enable) { - if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT)) + if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT)) return 0; - if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT)) + if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT)) return 0; - return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable); + return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable); } static int smu_v11_0_get_current_rpm(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 669bd0c2a16c..9ef57fcf7e78 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2254,6 +2254,8 @@ static uint32_t ci_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU7_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU7_SoftRegisters, AverageGraphicsA); + case AverageMemoryActivity: + return offsetof(SMU7_SoftRegisters, AverageMemoryA); case PreVBlankGap: return offsetof(SMU7_SoftRegisters, PreVBlankGap); case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index bc8375cbf297..0ce85b73338e 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -2304,6 +2304,8 @@ static uint32_t fiji_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU73_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU73_SoftRegisters, AverageGraphicsActivity); + case AverageMemoryActivity: + return offsetof(SMU73_SoftRegisters, AverageMemoryActivity); case PreVBlankGap: return offsetof(SMU73_SoftRegisters, PreVBlankGap); case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c index 375ccf6ff5f2..f24f13d77808 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c @@ -2219,6 +2219,8 @@ static uint32_t iceland_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU71_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU71_SoftRegisters, AverageGraphicsActivity); + case AverageMemoryActivity: + return offsetof(SMU71_SoftRegisters, AverageMemoryActivity); case PreVBlankGap: return offsetof(SMU71_SoftRegisters, PreVBlankGap); case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 2d4cfe14f72e..0d8958e71b94 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -2313,6 +2313,8 @@ static uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU74_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU74_SoftRegisters, AverageGraphicsActivity); + case AverageMemoryActivity: + return offsetof(SMU74_SoftRegisters, AverageMemoryActivity); case PreVBlankGap: return offsetof(SMU74_SoftRegisters, PreVBlankGap); case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index 3ed6c5f1e5cf..060c0f7f5238 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -2611,6 +2611,8 @@ static uint32_t tonga_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU72_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU72_SoftRegisters, AverageGraphicsActivity); + case AverageMemoryActivity: + return offsetof(SMU72_SoftRegisters, AverageMemoryActivity); case PreVBlankGap: return offsetof(SMU72_SoftRegisters, PreVBlankGap); case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c index ddb801517667..1eaf0fa28ef7 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c @@ -287,8 +287,26 @@ static int vega12_smu_init(struct pp_hwmgr *hwmgr) priv->smu_tables.entry[TABLE_OVERDRIVE].version = 0x01; priv->smu_tables.entry[TABLE_OVERDRIVE].size = sizeof(OverDriveTable_t); + /* allocate space for SMU_METRICS table */ + ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, + sizeof(SmuMetrics_t), + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &priv->smu_tables.entry[TABLE_SMU_METRICS].handle, + &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr, + &priv->smu_tables.entry[TABLE_SMU_METRICS].table); + if (ret) + goto err4; + + priv->smu_tables.entry[TABLE_SMU_METRICS].version = 0x01; + priv->smu_tables.entry[TABLE_SMU_METRICS].size = sizeof(SmuMetrics_t); + return 0; +err4: + amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle, + &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr, + &priv->smu_tables.entry[TABLE_OVERDRIVE].table); err3: amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].handle, &priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].mc_addr, @@ -334,6 +352,9 @@ static int vega12_smu_fini(struct pp_hwmgr *hwmgr) amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle, &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr, &priv->smu_tables.entry[TABLE_OVERDRIVE].table); + amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_SMU_METRICS].handle, + &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr, + &priv->smu_tables.entry[TABLE_SMU_METRICS].table); kfree(hwmgr->smu_backend); hwmgr->smu_backend = NULL; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c index 1e69300f6175..d499204b2184 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c @@ -2167,6 +2167,8 @@ static uint32_t vegam_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU75_SoftRegisters, VoltageChangeTimeout); case AverageGraphicsActivity: return offsetof(SMU75_SoftRegisters, AverageGraphicsActivity); + case AverageMemoryActivity: + return offsetof(SMU75_SoftRegisters, AverageMemoryActivity); case PreVBlankGap: return offsetof(SMU75_SoftRegisters, PreVBlankGap); case VBlankTimeout: diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 8fafcbdb1dfd..4aa8f5a69c4c 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -2374,6 +2374,157 @@ static int vega20_odn_edit_dpm_table(struct smu_context *smu, return ret; } +static int vega20_get_enabled_smc_features(struct smu_context *smu, + uint64_t *features_enabled) +{ + uint32_t feature_mask[2] = {0, 0}; + int ret = 0; + + ret = smu_feature_get_enabled_mask(smu, feature_mask, 2); + if (ret) + return ret; + + *features_enabled = ((((uint64_t)feature_mask[0] << SMU_FEATURES_LOW_SHIFT) & SMU_FEATURES_LOW_MASK) | + (((uint64_t)feature_mask[1] << SMU_FEATURES_HIGH_SHIFT) & SMU_FEATURES_HIGH_MASK)); + + return ret; +} + +static int vega20_enable_smc_features(struct smu_context *smu, + bool enable, uint64_t feature_mask) +{ + uint32_t smu_features_low, smu_features_high; + int ret = 0; + + smu_features_low = (uint32_t)((feature_mask & SMU_FEATURES_LOW_MASK) >> SMU_FEATURES_LOW_SHIFT); + smu_features_high = (uint32_t)((feature_mask & SMU_FEATURES_HIGH_MASK) >> SMU_FEATURES_HIGH_SHIFT); + + if (enable) { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesLow, + smu_features_low); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesHigh, + smu_features_high); + if (ret) + return ret; + } else { + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesLow, + smu_features_low); + if (ret) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesHigh, + smu_features_high); + if (ret) + return ret; + } + + return 0; + +} + +static int vega20_get_ppfeature_status(struct smu_context *smu, char *buf) +{ + static const char *ppfeature_name[] = { + "DPM_PREFETCHER", + "GFXCLK_DPM", + "UCLK_DPM", + "SOCCLK_DPM", + "UVD_DPM", + "VCE_DPM", + "ULV", + "MP0CLK_DPM", + "LINK_DPM", + "DCEFCLK_DPM", + "GFXCLK_DS", + "SOCCLK_DS", + "LCLK_DS", + "PPT", + "TDC", + "THERMAL", + "GFX_PER_CU_CG", + "RM", + "DCEFCLK_DS", + "ACDC", + "VR0HOT", + "VR1HOT", + "FW_CTF", + "LED_DISPLAY", + "FAN_CONTROL", + "GFX_EDC", + "GFXOFF", + "CG", + "FCLK_DPM", + "FCLK_DS", + "MP1CLK_DS", + "MP0CLK_DS", + "XGMI", + "ECC"}; + static const char *output_title[] = { + "FEATURES", + "BITMASK", + "ENABLEMENT"}; + uint64_t features_enabled; + int i; + int ret = 0; + int size = 0; + + ret = vega20_get_enabled_smc_features(smu, &features_enabled); + if (ret) + return ret; + + size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled); + size += sprintf(buf + size, "%-19s %-22s %s\n", + output_title[0], + output_title[1], + output_title[2]); + for (i = 0; i < GNLD_FEATURES_MAX; i++) { + size += sprintf(buf + size, "%-19s 0x%016llx %6s\n", + ppfeature_name[i], + 1ULL << i, + (features_enabled & (1ULL << i)) ? "Y" : "N"); + } + + return size; +} + +static int vega20_set_ppfeature_status(struct smu_context *smu, uint64_t new_ppfeature_masks) +{ + uint64_t features_enabled; + uint64_t features_to_enable; + uint64_t features_to_disable; + int ret = 0; + + if (new_ppfeature_masks >= (1ULL << GNLD_FEATURES_MAX)) + return -EINVAL; + + ret = vega20_get_enabled_smc_features(smu, &features_enabled); + if (ret) + return ret; + + features_to_disable = + features_enabled & ~new_ppfeature_masks; + features_to_enable = + ~features_enabled & new_ppfeature_masks; + + pr_debug("features_to_disable 0x%llx\n", features_to_disable); + pr_debug("features_to_enable 0x%llx\n", features_to_enable); + + if (features_to_disable) { + ret = vega20_enable_smc_features(smu, false, features_to_disable); + if (ret) + return ret; + } + + if (features_to_enable) { + ret = vega20_enable_smc_features(smu, true, features_to_enable); + if (ret) + return ret; + } + + return 0; +} + static const struct pptable_funcs vega20_ppt_funcs = { .alloc_dpm_context = vega20_allocate_dpm_context, .store_powerplay_table = vega20_store_powerplay_table, @@ -2404,6 +2555,8 @@ static const struct pptable_funcs vega20_ppt_funcs = { .unforce_dpm_levels = vega20_unforce_dpm_levels, .upload_dpm_level = vega20_upload_dpm_level, .get_profiling_clk_mask = vega20_get_profiling_clk_mask, + .set_ppfeature_status = vega20_set_ppfeature_status, + .get_ppfeature_status = vega20_get_ppfeature_status, }; void vega20_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h index 5a0d2af63173..87f3a8303645 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h @@ -36,6 +36,50 @@ #define AVFS_CURVE 0 #define OD8_HOTCURVE_TEMPERATURE 85 +#define SMU_FEATURES_LOW_MASK 0x00000000FFFFFFFF +#define SMU_FEATURES_LOW_SHIFT 0 +#define SMU_FEATURES_HIGH_MASK 0xFFFFFFFF00000000 +#define SMU_FEATURES_HIGH_SHIFT 32 + +enum { + GNLD_DPM_PREFETCHER = 0, + GNLD_DPM_GFXCLK, + GNLD_DPM_UCLK, + GNLD_DPM_SOCCLK, + GNLD_DPM_UVD, + GNLD_DPM_VCE, + GNLD_ULV, + GNLD_DPM_MP0CLK, + GNLD_DPM_LINK, + GNLD_DPM_DCEFCLK, + GNLD_DS_GFXCLK, + GNLD_DS_SOCCLK, + GNLD_DS_LCLK, + GNLD_PPT, + GNLD_TDC, + GNLD_THERMAL, + GNLD_GFX_PER_CU_CG, + GNLD_RM, + GNLD_DS_DCEFCLK, + GNLD_ACDC, + GNLD_VR0HOT, + GNLD_VR1HOT, + GNLD_FW_CTF, + GNLD_LED_DISPLAY, + GNLD_FAN_CONTROL, + GNLD_DIDT, + GNLD_GFXOFF, + GNLD_CG, + GNLD_DPM_FCLK, + GNLD_DS_FCLK, + GNLD_DS_MP1CLK, + GNLD_DS_MP0CLK, + GNLD_XGMI, + GNLD_ECC, + + GNLD_FEATURES_MAX +}; + struct vega20_dpm_level { bool enabled; uint32_t value; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index dc067ed0b72d..070d1bc7e725 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -35,9 +35,10 @@ struct kfd_ioctl_get_version_args { }; /* For kfd_ioctl_create_queue_args.queue_type. */ -#define KFD_IOC_QUEUE_TYPE_COMPUTE 0 -#define KFD_IOC_QUEUE_TYPE_SDMA 1 -#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 2 +#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0 +#define KFD_IOC_QUEUE_TYPE_SDMA 0x1 +#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 +#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 @@ -338,6 +339,7 @@ struct kfd_ioctl_acquire_vm_args { #define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) #define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2) #define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) +#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) /* Allocation flags: attributes/access options */ #define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) #define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) @@ -408,6 +410,21 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { __u32 n_success; /* to/from KFD */ }; +/* Allocate GWS for specific queue + * + * @gpu_id: device identifier + * @queue_id: queue's id that GWS is allocated for + * @num_gws: how many GWS to allocate + * @first_gws: index of the first GWS allocated. + * only support contiguous GWS allocation + */ +struct kfd_ioctl_alloc_queue_gws_args { + __u32 gpu_id; /* to KFD */ + __u32 queue_id; /* to KFD */ + __u32 num_gws; /* to KFD */ + __u32 first_gws; /* from KFD */ +}; + struct kfd_ioctl_get_dmabuf_info_args { __u64 size; /* from KFD */ __u64 metadata_ptr; /* to KFD */ @@ -426,6 +443,13 @@ struct kfd_ioctl_import_dmabuf_args { __u32 dmabuf_fd; /* to KFD */ }; +/* Register offset inside the remapped mmio page + */ +enum kfd_mmio_remap { + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0, + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -520,7 +544,10 @@ struct kfd_ioctl_import_dmabuf_args { #define AMDKFD_IOC_IMPORT_DMABUF \ AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) +#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ + AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1E +#define AMDKFD_COMMAND_END 0x1F #endif |