diff options
author | Dave Airlie <airlied@redhat.com> | 2019-05-31 09:33:29 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2019-05-31 10:04:39 +1000 |
commit | 91c1ead6aee22d4595f50ba66070b94a4a8f84a9 (patch) | |
tree | 066ffa1c352b6257dd37cda6a1df53159e133f2e /drivers/gpu/drm/amd/amdgpu | |
parent | 14ee642c2ab0a3d8a1ded11fade692d8b77172b9 (diff) | |
parent | cf401e2856b27b2deeada498eab864e2a50cf219 (diff) |
Merge branch 'drm-next-5.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
New stuff for 5.3:
- Add new thermal sensors for vega asics
- Various RAS fixes
- Add sysfs interface for memory interface utilization
- Use HMM rather than mmu notifier for user pages
- Expose xgmi topology via kfd
- SR-IOV fixes
- Fixes for manual driver reload
- Add unique identifier for vega asics
- Clean up user fence handling with UVD/VCE/VCN blocks
- Convert DC to use core bpc attribute rather than a custom one
- Add GWS support for KFD
- Vega powerplay improvements
- Add CRC support for DCE 12
- SR-IOV support for new security policy
- Various cleanups
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190529220944.14464-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
73 files changed, 3119 insertions, 1111 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 844f0a162981..a04f2fc7bf37 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -27,10 +27,11 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU - select MMU_NOTIFIER + depends on ARCH_HAS_HMM + select HMM_MIRROR help - This option selects CONFIG_MMU_NOTIFIER if it isn't already - selected to enabled full userptr support. + This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it + isn't already selected to enabled full userptr support. config DRM_AMDGPU_GART_DEBUGFS bool "Allow GART access through debugfs" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index fdd0ca4b0f0b..57ce44cc3226 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ @@ -173,7 +173,7 @@ endif amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o include $(FULL_AMD_PATH)/powerplay/Makefile diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 14398f55f602..58f8f132904d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -118,7 +118,6 @@ extern int amdgpu_disp_priority; extern int amdgpu_hw_i2c; extern int amdgpu_pcie_gen2; extern int amdgpu_msi; -extern int amdgpu_lockup_timeout; extern int amdgpu_dpm; extern int amdgpu_fw_load_type; extern int amdgpu_aspm; @@ -211,6 +210,7 @@ struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; struct amdgpu_atif; +struct kfd_vm_fault_info; enum amdgpu_cp_irq { AMDGPU_CP_IRQ_GFX_EOP = 0, @@ -415,6 +415,7 @@ struct amdgpu_fpriv { }; int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); +int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev); int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); @@ -558,6 +559,8 @@ struct amdgpu_asic_funcs { uint64_t *count1); /* do we need to reset the asic at init time (e.g., kexec) */ bool (*need_reset_on_init)(struct amdgpu_device *adev); + /* PCIe replay counter */ + uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); }; /* @@ -639,6 +642,11 @@ struct nbio_hdp_flush_reg { u32 ref_and_mask_sdma1; }; +struct amdgpu_mmio_remap { + u32 reg_offset; + resource_size_t bus_addr; +}; + struct amdgpu_nbio_funcs { const struct nbio_hdp_flush_reg *hdp_flush_reg; u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); @@ -666,6 +674,7 @@ struct amdgpu_nbio_funcs { void (*ih_control)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev); void (*detect_hw_virt)(struct amdgpu_device *adev); + void (*remap_hdp_registers)(struct amdgpu_device *adev); }; struct amdgpu_df_funcs { @@ -680,6 +689,12 @@ struct amdgpu_df_funcs { u32 *flags); void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, bool enable); + int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, + int is_enable); + int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, + int is_disable); + void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, + uint64_t *count); }; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { @@ -764,6 +779,7 @@ struct amdgpu_device { void __iomem *rmmio; /* protects concurrent MM_INDEX/DATA based register access */ spinlock_t mmio_idx_lock; + struct amdgpu_mmio_remap rmmio_remap; /* protects concurrent SMC based register access */ spinlock_t smc_idx_lock; amdgpu_rreg_t smc_rreg; @@ -936,6 +952,13 @@ struct amdgpu_device { struct work_struct xgmi_reset_work; bool in_baco_reset; + + long gfx_timeout; + long sdma_timeout; + long video_timeout; + long compute_timeout; + + uint64_t unique_id; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1065,6 +1088,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) +#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); @@ -1081,6 +1105,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 array_size); bool amdgpu_device_is_px(struct drm_device *dev); +bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev); + /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) void amdgpu_register_atpx_handler(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index aeead072fa79..4af3989e4a75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -25,8 +25,10 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_gfx.h" +#include "amdgpu_dma_buf.h" #include <linux/module.h> #include <linux/dma-buf.h> +#include "amdgpu_xgmi.h" static const unsigned int compute_vmid_bitmap = 0xFF00; @@ -148,7 +150,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) }; /* this is going to have a few of the MSBs set that we need to - * clear */ + * clear + */ bitmap_complement(gpu_resources.queue_bitmap, adev->gfx.mec.queue_bitmap, KGD_MAX_QUEUES); @@ -162,7 +165,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) gpu_resources.queue_bitmap); /* According to linux/bitmap.h we shouldn't use bitmap_clear if - * nbits is not compile time constant */ + * nbits is not compile time constant + */ last_valid_bit = 1 /* only first MEC can have compute queues */ * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; @@ -335,6 +339,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) amdgpu_bo_unref(&(bo)); } +int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, + void **mem_obj) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_bo *bo = NULL; + struct amdgpu_bo_param bp; + int r; + + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = 1; + bp.domain = AMDGPU_GEM_DOMAIN_GWS; + bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + bp.type = ttm_bo_type_device; + bp.resv = NULL; + + r = amdgpu_bo_create(adev, &bp, &bo); + if (r) { + dev_err(adev->dev, + "failed to allocate gws BO for amdkfd (%d)\n", r); + return r; + } + + *mem_obj = bo; + return 0; +} + +void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj) +{ + struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; + + amdgpu_bo_unref(&bo); +} + uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { @@ -518,6 +556,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) return adev->gmc.xgmi.hive_id; } +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src) +{ + struct amdgpu_device *peer_adev = (struct amdgpu_device *)src; + struct amdgpu_device *adev = (struct amdgpu_device *)dst; + int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); + + if (ret < 0) { + DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n", + adev->gmc.xgmi.physical_node_id, + peer_adev->gmc.xgmi.physical_node_id, ret); + ret = 0; + } + return (uint8_t)ret; +} + +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->rmmio_remap.bus_addr; +} + +uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->gds.gws_size; +} int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4e37fa7e85b1..f968bf147c5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -61,7 +61,6 @@ struct kgd_mem { atomic_t invalid; struct amdkfd_process_info *process_info; - struct page **user_pages; struct amdgpu_sync sync; @@ -154,6 +153,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr, bool mqd_gfx9); void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); +int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj); +void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj); +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem); +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem); uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, @@ -169,6 +172,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, uint32_t *flags); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); +uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); +uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); +uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); #define read_user_wptr(mmptr, wptr, dst) \ ({ \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index fa09e11a600c..c6abcf72e822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; - pr_debug("kfd: sdma base address: 0x%x\n", retval); + pr_debug("sdma base address: 0x%x\n", retval); return retval; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index fec3a6aa1de6..4e8b4e949926 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; - pr_debug("kfd: sdma base address: 0x%x\n", retval); + pr_debug("sdma base address: 0x%x\n", retval); return retval; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index ef3d93b995b2..d5af41143d12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, lock_srbm(kgd, 0, 0, 0, vmid); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); /* APE1 no longer exists on GFX9 */ unlock_srbm(kgd); @@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, ((mec << 5) | (pipe << 3) | queue_id | 0x80)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); } /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ @@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32(reg, mqd_hqd[reg - hqd_base]); + WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); if (wptr) { /* Don't read wptr with get_user because the user @@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), lower_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), upper_32_bits(guessed_wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), lower_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); release_queue(kgd); @@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, acquire_queue(kgd, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) - WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); switch (reset_type) { case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: @@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, break; } - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { @@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - /* Use legacy mode tlb invalidation. - * - * Currently on Raven the code below is broken for anything but - * legacy mode due to a MMHUB power gating problem. A workaround - * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ - * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack - * bit. - * - * TODO 1: agree on the right set of invalidation registers for - * KFD use. Use the last one for now. Invalidate both GC and - * MMHUB. - * - * TODO 2: support range-based invalidation, requires kfg2kgd - * interface change - */ - amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); -} - -static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type) { signed long r; uint32_t seq; @@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) PACKET3_INVALIDATE_TLBS_DST_SEL(1) | PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | PACKET3_INVALIDATE_TLBS_PASID(pasid) | - PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); @@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) struct amdgpu_device *adev = (struct amdgpu_device *) kgd; int vmid; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + uint32_t flush_type = 0; if (adev->in_gpu_reset) return -EIO; + if (adev->gmc.xgmi.num_physical_nodes && + adev->asic_type == CHIP_VEGA20) + flush_type = 2; if (ring->sched.ready) - return invalidate_tlbs_with_kiq(adev, pasid); + return invalidate_tlbs_with_kiq(adev, pasid, flush_type); for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) @@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) == pasid) { - write_vmid_invalidate_request(kgd, vmid); + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + flush_type); break; } } @@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) return 0; } - write_vmid_invalidate_request(kgd, vmid); + /* Use legacy mode tlb invalidation. + * + * Currently on Raven the code below is broken for anything but + * legacy mode due to a MMHUB power gating problem. A workaround + * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ + * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack + * bit. + * + * TODO 1: agree on the right set of invalidation registers for + * KFD use. Use the last one for now. Invalidate both GC and + * MMHUB. + * + * TODO 2: support range-based invalidation, requires kfg2kgd + * interface change + */ + amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); return 0; } @@ -838,7 +837,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, mutex_lock(&adev->grbm_idx_mutex); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, @@ -848,7 +847,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd, data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); mutex_unlock(&adev->grbm_idx_mutex); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a6e5184d436c..87177ed37dd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -30,6 +30,7 @@ #include "amdgpu_object.h" #include "amdgpu_vm.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_dma_buf.h" /* Special VM and GART address alignment needed for VI pre-Fiji due to * a HW bug. @@ -456,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, mutex_unlock(&process_info->lock); } +static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, + struct amdkfd_process_info *process_info) +{ + struct ttm_validate_buffer *bo_list_entry; + + bo_list_entry = &mem->validate_list; + mutex_lock(&process_info->lock); + list_del(&bo_list_entry->head); + mutex_unlock(&process_info->lock); +} + /* Initializes user pages. It registers the MMU notifier and validates * the userptr BO in the GTT domain. * @@ -491,28 +503,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - /* If no restore worker is running concurrently, user_pages - * should not be allocated - */ - WARN(mem->user_pages, "Leaking user_pages array"); - - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", __func__); - ret = -ENOMEM; - goto unregister_out; - } - - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto free_out; + goto unregister_out; } - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); - ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -525,11 +521,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); -free_out: - kvfree(mem->user_pages); - mem->user_pages = NULL; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -588,7 +580,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); @@ -652,7 +643,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); i = 0; @@ -896,6 +886,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; + ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); + if (ret) + goto reserve_shared_fail; amdgpu_bo_fence(vm->root.base.bo, &vm->process_info->eviction_fence->base, true); amdgpu_bo_unreserve(vm->root.base.bo); @@ -909,6 +902,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, return 0; +reserve_shared_fail: wait_pd_fail: validate_pd_fail: amdgpu_bo_unreserve(vm->root.base.bo); @@ -1109,7 +1103,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (!offset || !*offset) return -EINVAL; user_addr = *offset; - } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { + } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL | + ALLOC_MEM_FLAGS_MMIO_REMAP)) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_CPU; bo_type = ttm_bo_type_sg; @@ -1199,12 +1194,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (user_addr) { ret = init_user_pages(*mem, current->mm, user_addr); - if (ret) { - mutex_lock(&avm->process_info->lock); - list_del(&(*mem)->validate_list.head); - mutex_unlock(&avm->process_info->lock); + if (ret) goto allocate_init_user_pages_failed; - } } if (offset) @@ -1213,6 +1204,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( return 0; allocate_init_user_pages_failed: + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); amdgpu_bo_unref(&bo); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; @@ -1262,15 +1254,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); - /* Free user pages if necessary */ - if (mem->user_pages) { - pr_debug("%s: Freeing user_pages array\n", __func__); - if (mem->user_pages[0]) - release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages); - kvfree(mem->user_pages); - } - ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1294,8 +1277,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the sync object */ amdgpu_sync_free(&mem->sync); - /* If the SG is not NULL, it's one we created for a doorbell - * BO. We need to free it. + /* If the SG is not NULL, it's one we created for a doorbell or mmio + * remap BO. We need to free it. */ if (mem->bo->tbo.sg) { sg_free_table(mem->bo->tbo.sg); @@ -1409,7 +1392,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( ret = map_bo_to_gpuvm(adev, entry, ctx.sync, is_invalid_userptr); if (ret) { - pr_err("Failed to map radeon bo to gpuvm\n"); + pr_err("Failed to map bo to gpuvm\n"); goto map_bo_to_gpuvm_failed; } @@ -1744,25 +1727,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; - if (!mem->user_pages) { - mem->user_pages = - kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!mem->user_pages) { - pr_err("%s: Failed to allocate pages array\n", - __func__); - return -ENOMEM; - } - } else if (mem->user_pages[0]) { - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); - } - /* Get updated user pages */ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, - mem->user_pages); + bo->tbo.ttm->pages); if (ret) { - mem->user_pages[0] = NULL; + bo->tbo.ttm->pages[0] = NULL; pr_info("%s: Failed to get user pages: %d\n", __func__, ret); /* Pretend it succeeded. It will fail later @@ -1771,17 +1740,28 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * stalled user mode queues. */ } - - /* Mark the BO as valid unless it was invalidated - * again concurrently - */ - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) - return -EAGAIN; } return 0; } +/* Remove invalid userptr BOs from hmm track list + * + * Stop HMM track the userptr update + */ +static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info) +{ + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + bo = mem->bo; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } +} + /* Validate invalid userptr BOs * * Validates BOs on the userptr_inval_list, and moves them back to the @@ -1806,7 +1786,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) GFP_KERNEL); if (!pd_bo_list_entries) { pr_err("%s: Failed to allocate PD BO list entries\n", __func__); - return -ENOMEM; + ret = -ENOMEM; + goto out_no_mem; } INIT_LIST_HEAD(&resv_list); @@ -1830,7 +1811,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); WARN(!list_empty(&duplicates), "Duplicates should be empty"); if (ret) - goto out; + goto out_free; amdgpu_sync_create(&sync); @@ -1846,10 +1827,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) bo = mem->bo; - /* Copy pages array and validate the BO if we got user pages */ - if (mem->user_pages[0]) { - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, - mem->user_pages); + /* Validate the BO if we got user pages */ + if (bo->tbo.ttm->pages[0]) { amdgpu_bo_placement_from_domain(bo, mem->domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) { @@ -1858,16 +1837,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } - /* Validate succeeded, now the BO owns the pages, free - * our copy of the pointer array. Put this BO back on - * the userptr_valid_list. If we need to revalidate - * it, we need to start from scratch. - */ - kvfree(mem->user_pages); - mem->user_pages = NULL; list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); + /* Stop HMM track the userptr update. We dont check the return + * value for concurrent CPU page table update because we will + * reschedule the restore worker if process_info->evicted_bos + * is updated. + */ + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + /* Update mapping. If the BO was not validated * (because we couldn't get user pages), this will * clear the page table entries, which will result in @@ -1897,8 +1876,9 @@ unreserve_out: ttm_eu_backoff_reservation(&ticket, &resv_list); amdgpu_sync_wait(&sync, false); amdgpu_sync_free(&sync); -out: +out_free: kfree(pd_bo_list_entries); +out_no_mem: return ret; } @@ -1963,7 +1943,9 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) * hanging. No point trying again. */ } + unlock_out: + untrack_invalid_user_pages(process_info); mutex_unlock(&process_info->lock); mmput(mm); put_task_struct(usertask); @@ -2130,3 +2112,88 @@ ttm_reserve_fail: kfree(pd_bo_list); return ret; } + +int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem) +{ + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; + struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws; + int ret; + + if (!info || !gws) + return -EINVAL; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -EINVAL; + + mutex_init(&(*mem)->lock); + (*mem)->bo = amdgpu_bo_ref(gws_bo); + (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS; + (*mem)->process_info = process_info; + add_kgd_mem_to_kfd_bo_list(*mem, process_info, false); + amdgpu_sync_create(&(*mem)->sync); + + + /* Validate gws bo the first time it is added to process */ + mutex_lock(&(*mem)->process_info->lock); + ret = amdgpu_bo_reserve(gws_bo, false); + if (unlikely(ret)) { + pr_err("Reserve gws bo failed %d\n", ret); + goto bo_reservation_failure; + } + + ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true); + if (ret) { + pr_err("GWS BO validate failed %d\n", ret); + goto bo_validation_failure; + } + /* GWS resource is shared b/t amdgpu and amdkfd + * Add process eviction fence to bo so they can + * evict each other. + */ + amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); + amdgpu_bo_unreserve(gws_bo); + mutex_unlock(&(*mem)->process_info->lock); + + return ret; + +bo_validation_failure: + amdgpu_bo_unreserve(gws_bo); +bo_reservation_failure: + mutex_unlock(&(*mem)->process_info->lock); + amdgpu_sync_free(&(*mem)->sync); + remove_kgd_mem_from_kfd_bo_list(*mem, process_info); + amdgpu_bo_unref(&gws_bo); + mutex_destroy(&(*mem)->lock); + kfree(*mem); + *mem = NULL; + return ret; +} + +int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) +{ + int ret; + struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info; + struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; + struct amdgpu_bo *gws_bo = kgd_mem->bo; + + /* Remove BO from process's validate list so restore worker won't touch + * it anymore + */ + remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info); + + ret = amdgpu_bo_reserve(gws_bo, false); + if (unlikely(ret)) { + pr_err("Reserve gws bo failed %d\n", ret); + //TODO add BO back to validate_list? + return ret; + } + amdgpu_amdkfd_remove_eviction_fence(gws_bo, + process_info->eviction_fence); + amdgpu_bo_unreserve(gws_bo); + amdgpu_sync_free(&kgd_mem->sync); + amdgpu_bo_unref(&gws_bo); + mutex_destroy(&kgd_mem->lock); + kfree(mem); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 5c79da8e1150..d497467b7fc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -81,9 +81,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, return -ENOMEM; kref_init(&list->refcount); - list->gds_obj = adev->gds.gds_gfx_bo; - list->gws_obj = adev->gds.gws_gfx_bo; - list->oa_obj = adev->gds.oa_gfx_bo; + list->gds_obj = NULL; + list->gws_obj = NULL; + list->oa_obj = NULL; array = amdgpu_bo_list_array_entry(list, 0); memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 7c5f5d1601e6..a130e766cbdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; - int user_invalidated; + bool user_invalidated; }; struct amdgpu_bo_list { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2f6239b6be6f..d72cc583ebd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.tv.bo = &bo->tbo; /* One for TTM and one for the CS job */ p->uf_entry.tv.num_shared = 2; - p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); @@ -542,14 +541,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, if (usermm && usermm != current->mm) return -EPERM; - /* Check if we have user pages and nobody bound the BO already */ - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - lobj->user_pages) { + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && + lobj->user_invalidated && lobj->user_pages) { amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); binding_userptr = true; @@ -580,7 +579,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *gds; struct amdgpu_bo *gws; struct amdgpu_bo *oa; - unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); @@ -616,79 +614,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); - while (1) { - struct list_head need_pages; - - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto error_free_pages; - } - - INIT_LIST_HEAD(&need_pages); - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, - &e->user_invalidated) && e->user_pages) { - - /* We acquired a page array, but somebody - * invalidated it. Free it and try again - */ - release_pages(e->user_pages, - bo->tbo.ttm->num_pages); - kvfree(e->user_pages); - e->user_pages = NULL; - } - - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - !e->user_pages) { - list_del(&e->tv.head); - list_add(&e->tv.head, &need_pages); - - amdgpu_bo_unreserve(bo); - } + /* Get userptr backing pages. If pages are updated after registered + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do + * amdgpu_ttm_backend_bind() to flush and invalidate new pages + */ + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + bool userpage_invalidated = false; + int i; + + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!e->user_pages) { + DRM_ERROR("calloc failure\n"); + return -ENOMEM; } - if (list_empty(&need_pages)) - break; - - /* Unreserve everything again. */ - ttm_eu_backoff_reservation(&p->ticket, &p->validated); - - /* We tried too many times, just abort */ - if (!--tries) { - r = -EDEADLK; - DRM_ERROR("deadlock in %s\n", __func__); - goto error_free_pages; + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); + if (r) { + kvfree(e->user_pages); + e->user_pages = NULL; + return r; } - /* Fill the page arrays for all userptrs. */ - list_for_each_entry(e, &need_pages, tv.head) { - struct ttm_tt *ttm = e->tv.bo->ttm; - - e->user_pages = kvmalloc_array(ttm->num_pages, - sizeof(struct page*), - GFP_KERNEL | __GFP_ZERO); - if (!e->user_pages) { - r = -ENOMEM; - DRM_ERROR("calloc failure in %s\n", __func__); - goto error_free_pages; - } - - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); - if (r) { - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); - kvfree(e->user_pages); - e->user_pages = NULL; - goto error_free_pages; + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { + userpage_invalidated = true; + break; } } + e->user_invalidated = userpage_invalidated; + } - /* And try again. */ - list_splice(&need_pages, &p->validated); + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + goto out; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -757,17 +721,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, error_validate: if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - -error_free_pages: - - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - if (!e->user_pages) - continue; - - release_pages(e->user_pages, e->tv.bo->ttm->num_pages); - kvfree(e->user_pages); - } - +out: return r; } @@ -1054,11 +1008,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, j++; } - /* UVD & VCE fw doesn't support user fences */ + /* MM engine doesn't support user fences */ ring = to_amdgpu_ring(parser->entity->rq->sched); - if (parser->job->uf_addr && ( - ring->funcs->type == AMDGPU_RING_TYPE_UVD || - ring->funcs->type == AMDGPU_RING_TYPE_VCE)) + if (parser->job->uf_addr && ring->funcs->no_user_fence) return -EINVAL; return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); @@ -1328,7 +1280,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; uint64_t seq; - int r; job = p->job; @@ -1338,15 +1289,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock */ + /* No memory allocation is allowed while holding the mn lock. + * p->mn is hold until amdgpu_cs_submit is finished and fence is added + * to BOs. + */ amdgpu_mn_lock(p->mn); + + /* If userptr are invalidated after amdgpu_cs_parser_bos(), return + * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. + */ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - r = -ERESTARTSYS; - goto error_abort; - } + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } + if (r) { + r = -EAGAIN; + goto error_abort; } job->owner = p->filp; @@ -1442,6 +1401,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9f282e971197..0ffa6733f2b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -97,6 +97,28 @@ static const char *amdgpu_asic_name[] = { "LAST", }; +/** + * DOC: pcie_replay_count + * + * The amdgpu driver provides a sysfs API for reporting the total number + * of PCIe replays (NAKs) + * The file pcie_replay_count is used for this and returns the total + * number of replays as a sum of the NAKs generated and NAKs received + */ + +static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); + + return snprintf(buf, PAGE_SIZE, "%llu\n", cnt); +} + +static DEVICE_ATTR(pcie_replay_count, S_IRUGO, + amdgpu_device_get_pcie_replay_count, NULL); + static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); /** @@ -910,8 +932,10 @@ def_value: * Validates certain module parameters and updates * the associated values used by the driver (all asics). */ -static void amdgpu_device_check_arguments(struct amdgpu_device *adev) +static int amdgpu_device_check_arguments(struct amdgpu_device *adev) { + int ret = 0; + if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); @@ -956,12 +980,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_vram_page_split = 1024; } - if (amdgpu_lockup_timeout == 0) { - dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); - amdgpu_lockup_timeout = 10000; + ret = amdgpu_device_get_job_timeout_settings(adev); + if (ret) { + dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); + return ret; } adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + + return ret; } /** @@ -1505,12 +1532,26 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) r = amdgpu_virt_request_full_gpu(adev, true); if (r) return -EAGAIN; + + /* query the reg access mode at the very beginning */ + amdgpu_virt_init_reg_access_mode(adev); } adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + /* Read BIOS */ + if (!amdgpu_get_bios(adev)) + return -EINVAL; + + r = amdgpu_atombios_init(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); + return r; + } + for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { DRM_ERROR("disabled ip block: %d <%s>\n", @@ -1550,6 +1591,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) if (adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { r = adev->ip_blocks[i].version->funcs->hw_init(adev); if (r) { @@ -2473,7 +2515,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->lock_reset); mutex_init(&adev->virt.dpm_mutex); - amdgpu_device_check_arguments(adev); + r = amdgpu_device_check_arguments(adev); + if (r) + return r; spin_lock_init(&adev->mmio_idx_lock); spin_lock_init(&adev->smc_idx_lock); @@ -2558,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, goto fence_driver_init; } - /* Read BIOS */ - if (!amdgpu_get_bios(adev)) { - r = -EINVAL; - goto failed; - } - - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - goto failed; - } - /* detect if we are with an SRIOV vbios */ amdgpu_device_detect_sriov_bios(adev); @@ -2672,6 +2703,10 @@ fence_driver_init: if (r) DRM_ERROR("registering pm debugfs failed (%d).\n", r); + r = amdgpu_ucode_sysfs_init(adev); + if (r) + DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); + r = amdgpu_debugfs_gem_init(adev); if (r) DRM_ERROR("registering gem debugfs failed (%d).\n", r); @@ -2712,7 +2747,13 @@ fence_driver_init: } /* must succeed. */ - amdgpu_ras_post_init(adev); + amdgpu_ras_resume(adev); + + r = device_create_file(adev->dev, &dev_attr_pcie_replay_count); + if (r) { + dev_err(adev->dev, "Could not create pcie_replay_count"); + return r; + } return 0; @@ -2777,6 +2818,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->rmmio = NULL; amdgpu_device_doorbell_fini(adev); amdgpu_debugfs_regs_cleanup(adev); + device_remove_file(adev->dev, &dev_attr_pcie_replay_count); + amdgpu_ucode_sysfs_fini(adev); } @@ -2857,6 +2900,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) amdgpu_amdkfd_suspend(adev); + amdgpu_ras_suspend(adev); + r = amdgpu_device_ip_suspend_phase1(adev); /* evict vram memory */ @@ -2977,6 +3022,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) drm_kms_helper_poll_enable(dev); + amdgpu_ras_resume(adev); + /* * Most of the connector probing functions try to acquire runtime pm * refs to ensure that the GPU is powered on when connector polling is @@ -3455,6 +3502,13 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (vram_lost) amdgpu_device_fill_reset_magic(tmp_adev); + r = amdgpu_device_ip_late_init(tmp_adev); + if (r) + goto out; + + /* must succeed. */ + amdgpu_ras_resume(tmp_adev); + /* Update PSP FW topology after reset */ if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) r = amdgpu_xgmi_update_topology(hive, tmp_adev); @@ -3695,43 +3749,6 @@ skip_hw_reset: return r; } -static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev, - enum pci_bus_speed *speed, - enum pcie_link_width *width) -{ - struct pci_dev *pdev = adev->pdev; - enum pci_bus_speed cur_speed; - enum pcie_link_width cur_width; - u32 ret = 1; - - *speed = PCI_SPEED_UNKNOWN; - *width = PCIE_LNK_WIDTH_UNKNOWN; - - while (pdev) { - cur_speed = pcie_get_speed_cap(pdev); - cur_width = pcie_get_width_cap(pdev); - ret = pcie_bandwidth_available(adev->pdev, NULL, - NULL, &cur_width); - if (!ret) - cur_width = PCIE_LNK_WIDTH_RESRV; - - if (cur_speed != PCI_SPEED_UNKNOWN) { - if (*speed == PCI_SPEED_UNKNOWN) - *speed = cur_speed; - else if (cur_speed < *speed) - *speed = cur_speed; - } - - if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) { - if (*width == PCIE_LNK_WIDTH_UNKNOWN) - *width = cur_width; - else if (cur_width < *width) - *width = cur_width; - } - pdev = pci_upstream_bridge(pdev); - } -} - /** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * @@ -3765,8 +3782,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) return; - amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap, - &platform_link_width); + pcie_bandwidth_available(adev->pdev, NULL, + &platform_speed_cap, &platform_link_width); if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b083b219b1a9..30e6ad8a90bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -631,10 +631,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) amdgpu_dither_enum_list, sz); if (amdgpu_device_has_dc_support(adev)) { - adev->mode_info.max_bpc_property = - drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16); - if (!adev->mode_info.max_bpc_property) - return -ENOMEM; adev->mode_info.abm_level_property = drm_property_create_range(adev->ddev, 0, "abm level", 0, 4); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index a38e0fb4a6fe..4711cf1b5bd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -1,5 +1,5 @@ /* - * Copyright 2012 Advanced Micro Devices, Inc. + * Copyright 2019 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -103,7 +103,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) * Returns: * 0 on success or a negative error code on failure. */ -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -137,57 +138,6 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma return ret; } -/** - * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table - * implementation - * @dev: DRM device - * @attach: DMA-buf attachment - * @sg: Scatter/gather table - * - * Imports shared DMA buffer memory exported by another device. - * - * Returns: - * A new GEM BO of the given DRM device, representing the memory - * described by the given DMA-buf attachment and scatter/gather table. - */ -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg) -{ - struct reservation_object *resv = attach->dmabuf->resv; - struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_bo *bo; - struct amdgpu_bo_param bp; - int ret; - - memset(&bp, 0, sizeof(bp)); - bp.size = attach->dmabuf->size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_CPU; - bp.flags = 0; - bp.type = ttm_bo_type_sg; - bp.resv = resv; - ww_mutex_lock(&resv->lock, NULL); - ret = amdgpu_bo_create(adev, &bp, &bo); - if (ret) - goto error; - - bo->tbo.sg = sg; - bo->tbo.ttm->sg = sg; - bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; - bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; - if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) - bo->prime_shared_count = 1; - - ww_mutex_unlock(&resv->lock); - return &bo->gem_base; - -error: - ww_mutex_unlock(&resv->lock); - return ERR_PTR(ret); -} - static int __reservation_object_make_exclusive(struct reservation_object *obj) { @@ -231,7 +181,7 @@ err_fences_put: } /** - * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation + * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation * @dma_buf: Shared DMA buffer * @attach: DMA-buf attachment * @@ -242,8 +192,8 @@ err_fences_put: * Returns: * 0 on success or a negative error code on failure. */ -static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, - struct dma_buf_attachment *attach) +static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) { struct drm_gem_object *obj = dma_buf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -291,15 +241,15 @@ error_detach: } /** - * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation + * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation * @dma_buf: Shared DMA buffer * @attach: DMA-buf attachment * * This is called when a shared DMA buffer no longer needs to be accessible by * another device. For now, simply unpins the buffer from GTT. */ -static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, - struct dma_buf_attachment *attach) +static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) { struct drm_gem_object *obj = dma_buf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -334,7 +284,7 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) } /** - * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation + * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation * @dma_buf: Shared DMA buffer * @direction: Direction of DMA transfer * @@ -345,8 +295,8 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) * Returns: * 0 on success or a negative error code on failure. */ -static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, - enum dma_data_direction direction) +static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction direction) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -374,12 +324,12 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, } const struct dma_buf_ops amdgpu_dmabuf_ops = { - .attach = amdgpu_gem_map_attach, - .detach = amdgpu_gem_map_detach, + .attach = amdgpu_dma_buf_map_attach, + .detach = amdgpu_dma_buf_map_detach, .map_dma_buf = drm_gem_map_dma_buf, .unmap_dma_buf = drm_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, - .begin_cpu_access = amdgpu_gem_begin_cpu_access, + .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, .vunmap = drm_gem_dmabuf_vunmap, @@ -418,6 +368,57 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, } /** + * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table + * implementation + * @dev: DRM device + * @attach: DMA-buf attachment + * @sg: Scatter/gather table + * + * Imports shared DMA buffer memory exported by another device. + * + * Returns: + * A new GEM BO of the given DRM device, representing the memory + * described by the given DMA-buf attachment and scatter/gather table. + */ +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sg) +{ + struct reservation_object *resv = attach->dmabuf->resv; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; + int ret; + + memset(&bp, 0, sizeof(bp)); + bp.size = attach->dmabuf->size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_CPU; + bp.flags = 0; + bp.type = ttm_bo_type_sg; + bp.resv = resv; + ww_mutex_lock(&resv->lock, NULL); + ret = amdgpu_bo_create(adev, &bp, &bo); + if (ret) + goto error; + + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) + bo->prime_shared_count = 1; + + ww_mutex_unlock(&resv->lock); + return &bo->gem_base; + +error: + ww_mutex_unlock(&resv->lock); + return ERR_PTR(ret); +} + +/** * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation * @dev: DRM device * @dma_buf: Shared DMA buffer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h new file mode 100644 index 000000000000..c7056cbe8685 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h @@ -0,0 +1,46 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_DMA_BUF_H__ +#define __AMDGPU_DMA_BUF_H__ + +#include <drm/drm_gem.h> + +struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sg); +struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags); +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); +struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); +void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); +void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma); + +extern const struct dma_buf_ops amdgpu_dmabuf_ops; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index dca35407879d..521dbd0d9af8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal { int min_temp; /* high temperature threshold */ int max_temp; + /* edge max emergency(shutdown) temp */ + int max_edge_emergency_temp; + /* hotspot low temperature threshold */ + int min_hotspot_temp; + /* hotspot high temperature critical threshold */ + int max_hotspot_crit_temp; + /* hotspot max emergency(shutdown) temp */ + int max_hotspot_emergency_temp; + /* memory low temperature threshold */ + int min_mem_temp; + /* memory high temperature critical threshold */ + int max_mem_crit_temp; + /* memory max emergency(shutdown) temp */ + int max_mem_emergency_temp; /* was last interrupt low to high or high to low */ bool high_to_low; /* interrupt source */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1e2cc9d68a05..1f38d6fc1fe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -36,7 +36,7 @@ #include "amdgpu.h" #include "amdgpu_irq.h" -#include "amdgpu_gem.h" +#include "amdgpu_dma_buf.h" #include "amdgpu_amdkfd.h" @@ -81,6 +81,8 @@ #define KMS_DRIVER_MINOR 32 #define KMS_DRIVER_PATCHLEVEL 0 +#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 + int amdgpu_vram_limit = 0; int amdgpu_vis_vram_limit = 0; int amdgpu_gart_size = -1; /* auto */ @@ -93,7 +95,7 @@ int amdgpu_disp_priority = 0; int amdgpu_hw_i2c = 0; int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; -int amdgpu_lockup_timeout = 10000; +char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH]; int amdgpu_dpm = -1; int amdgpu_fw_load_type = -1; int amdgpu_aspm = -1; @@ -227,12 +229,21 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(msi, amdgpu_msi, int, 0444); /** - * DOC: lockup_timeout (int) - * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000. - * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000. - */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)"); -module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); + * DOC: lockup_timeout (string) + * Set GPU scheduler timeout value in ms. + * + * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or + * multiple values specified. 0 and negative values are invalidated. They will be adjusted + * to default timeout. + * - With one value specified, the setting will apply to all non-compute jobs. + * - With multiple values specified, the first one will be for GFX. The second one is for Compute. + * And the third and fourth ones are for SDMA and Video. + * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) + * jobs is 10000. And there is no timeout enforced on compute jobs. + */ +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and no timeout for compute jobs), " + "format is [Non-Compute] or [GFX,Compute,SDMA,Video]"); +module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); /** * DOC: dpm (int) @@ -655,6 +666,16 @@ MODULE_PARM_DESC(noretry, int halt_if_hws_hang; module_param(halt_if_hws_hang, int, 0644); MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); + +/** + * DOC: hws_gws_support(bool) + * Whether HWS support gws barriers. Default value: false (not supported) + * This will be replaced with a MEC firmware version check once firmware + * is ready + */ +bool hws_gws_support; +module_param(hws_gws_support, bool, 0444); +MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)"); #endif /** @@ -1216,6 +1237,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) return 0; } +int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) +{ + char *input = amdgpu_lockup_timeout; + char *timeout_setting = NULL; + int index = 0; + long timeout; + int ret = 0; + + /* + * By default timeout for non compute jobs is 10000. + * And there is no timeout enforced on compute jobs. + */ + adev->gfx_timeout = adev->sdma_timeout = adev->video_timeout = 10000; + adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + + if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { + while ((timeout_setting = strsep(&input, ",")) && + strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { + ret = kstrtol(timeout_setting, 0, &timeout); + if (ret) + return ret; + + /* Invalidate 0 and negative values */ + if (timeout <= 0) { + index++; + continue; + } + + switch (index++) { + case 0: + adev->gfx_timeout = timeout; + break; + case 1: + adev->compute_timeout = timeout; + break; + case 2: + adev->sdma_timeout = timeout; + break; + case 3: + adev->video_timeout = timeout; + break; + default: + break; + } + } + /* + * There is only one value specified and + * it should apply to all non-compute jobs. + */ + if (index == 1) + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; + } + + return ret; +} + static bool amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, bool in_vblank_irq, int *vpos, int *hpos, @@ -1230,7 +1307,8 @@ static struct drm_driver kms_driver = { .driver_features = DRIVER_USE_AGP | DRIVER_ATOMIC | DRIVER_GEM | - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, + DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, .load = amdgpu_driver_load_kms, .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 4dee2326b29c..3a483f7e89c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -427,9 +427,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission) { + struct amdgpu_device *adev = ring->adev; long timeout; int r; + if (!adev) + return -EINVAL; + /* Check that num_hw_submission is a power of two */ if ((num_hw_submission & (num_hw_submission - 1)) != 0) return -EINVAL; @@ -451,12 +455,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, /* No need to setup the GPU scheduler for KIQ ring */ if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) { - /* for non-sriov case, no timeout enforce on compute ring */ - if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) - && !amdgpu_sriov_vf(ring->adev)) - timeout = MAX_SCHEDULE_TIMEOUT; - else - timeout = msecs_to_jiffies(amdgpu_lockup_timeout); + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + timeout = adev->gfx_timeout; + break; + case AMDGPU_RING_TYPE_COMPUTE: + /* + * For non-sriov case, no timeout enforce + * on compute ring by default. Unless user + * specifies a timeout for compute ring. + * + * For sriov case, always use the timeout + * as gfx ring + */ + if (!amdgpu_sriov_vf(ring->adev)) + timeout = adev->compute_timeout; + else + timeout = adev->gfx_timeout; + break; + case AMDGPU_RING_TYPE_SDMA: + timeout = adev->sdma_timeout; + break; + default: + timeout = adev->video_timeout; + break; + } r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, num_hw_submission, amdgpu_job_hang_limit, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index f89f5734d985..dad2186f4ed5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -27,26 +27,11 @@ struct amdgpu_ring; struct amdgpu_bo; -struct amdgpu_gds_asic_info { - uint32_t total_size; - uint32_t gfx_partition_size; - uint32_t cs_partition_size; -}; - struct amdgpu_gds { - struct amdgpu_gds_asic_info mem; - struct amdgpu_gds_asic_info gws; - struct amdgpu_gds_asic_info oa; + uint32_t gds_size; + uint32_t gws_size; + uint32_t oa_size; uint32_t gds_compute_max_wave_id; - - /* At present, GDS, GWS and OA resources for gfx (graphics) - * is always pre-allocated and available for graphics operation. - * Such resource is shared between all gfx clients. - * TODO: move this operation to user space - * */ - struct amdgpu_bo* gds_gfx_bo; - struct amdgpu_bo* gws_gfx_bo; - struct amdgpu_bo* oa_gfx_bo; }; struct amdgpu_gds_reg_offset { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d4fcf5475464..7b840367004c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -330,26 +330,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_reserve(bo, true); if (r) - goto free_pages; + goto user_pages_done; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); if (r) - goto free_pages; + goto user_pages_done; } r = drm_gem_handle_create(filp, gobj, &handle); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put_unlocked(gobj); if (r) - return r; + goto user_pages_done; args->handle = handle; - return 0; -free_pages: - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); +user_pages_done: + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); release_object: drm_gem_object_put_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index f1ddfc50bcc7..b8ba6e27c61f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_file *file_priv); unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg); -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gobj, - int flags); -struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf); -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); -void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); -void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); - -extern const struct dma_buf_ops amdgpu_dmabuf_ops; /* * GEM objects. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 0a17fb1af204..7ab1241bd9e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) if (amdgpu_device_should_recover_gpu(ring->adev)) amdgpu_device_gpu_recover(ring->adev, job); + else + drm_sched_suspend_timeout(&ring->sched); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b17d0545728e..edb675103bd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -590,13 +590,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_amdgpu_info_gds gds_info; memset(&gds_info, 0, sizeof(gds_info)); - gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size; - gds_info.compute_partition_size = adev->gds.mem.cs_partition_size; - gds_info.gds_total_size = adev->gds.mem.total_size; - gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size; - gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size; - gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size; - gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size; + gds_info.compute_partition_size = adev->gds.gds_size; + gds_info.gds_total_size = adev->gds.gds_size; + gds_info.gws_per_compute_partition = adev->gds.gws_size; + gds_info.oa_per_compute_partition = adev->gds.oa_size; return copy_to_user(out, &gds_info, min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 58ed401c5996..41ccee49a224 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -45,7 +45,7 @@ #include <linux/firmware.h> #include <linux/module.h> -#include <linux/mmu_notifier.h> +#include <linux/hmm.h> #include <linux/interval_tree.h> #include <drm/drmP.h> #include <drm/drm.h> @@ -58,14 +58,12 @@ * * @adev: amdgpu device pointer * @mm: process address space - * @mn: MMU notifier structure * @type: type of MMU notifier * @work: destruction work item * @node: hash table node to find structure by adev and mn * @lock: rw semaphore protecting the notifier nodes * @objects: interval tree containing amdgpu_mn_nodes - * @read_lock: mutex for recursive locking of @lock - * @recursion: depth of recursion + * @mirror: HMM mirror function support * * Data for each amdgpu device and process address space. */ @@ -73,7 +71,6 @@ struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; struct mm_struct *mm; - struct mmu_notifier mn; enum amdgpu_mn_type type; /* only used on destruction */ @@ -85,8 +82,9 @@ struct amdgpu_mn { /* objects protected by lock */ struct rw_semaphore lock; struct rb_root_cached objects; - struct mutex read_lock; - atomic_t recursion; + + /* HMM mirror */ + struct hmm_mirror mirror; }; /** @@ -103,7 +101,7 @@ struct amdgpu_mn_node { }; /** - * amdgpu_mn_destroy - destroy the MMU notifier + * amdgpu_mn_destroy - destroy the HMM mirror * * @work: previously sheduled work item * @@ -129,28 +127,26 @@ static void amdgpu_mn_destroy(struct work_struct *work) } up_write(&amn->lock); mutex_unlock(&adev->mn_lock); - mmu_notifier_unregister_no_release(&amn->mn, amn->mm); + + hmm_mirror_unregister(&amn->mirror); kfree(amn); } /** - * amdgpu_mn_release - callback to notify about mm destruction + * amdgpu_hmm_mirror_release - callback to notify about mm destruction * - * @mn: our notifier - * @mm: the mm this callback is about + * @mirror: the HMM mirror (mm) this callback is about * - * Shedule a work item to lazy destroy our notifier. + * Shedule a work item to lazy destroy HMM mirror. */ -static void amdgpu_mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); INIT_WORK(&amn->work, amdgpu_mn_destroy); schedule_work(&amn->work); } - /** * amdgpu_mn_lock - take the write side lock for this notifier * @@ -181,14 +177,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) { if (blockable) - mutex_lock(&amn->read_lock); - else if (!mutex_trylock(&amn->read_lock)) + down_read(&amn->lock); + else if (!down_read_trylock(&amn->lock)) return -EAGAIN; - if (atomic_inc_return(&amn->recursion) == 1) - down_read_non_owner(&amn->lock); - mutex_unlock(&amn->read_lock); - return 0; } @@ -199,8 +191,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable) */ static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) { - if (atomic_dec_return(&amn->recursion) == 0) - up_read_non_owner(&amn->lock); + up_read(&amn->lock); } /** @@ -229,149 +220,132 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); - - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); } } /** - * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change + * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change * - * @mn: our notifier - * @range: mmu notifier context + * @mirror: the hmm_mirror (mm) is about to update + * @update: the update start, end address * * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, + const struct hmm_update *update) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); + unsigned long start = update->start; + unsigned long end = update->end; + bool blockable = update->blockable; struct interval_tree_node *it; - unsigned long end; /* notification is exclusive, but interval is inclusive */ - end = range->end - 1; + end -= 1; /* TODO we should be able to split locking for interval tree and * amdgpu_mn_invalidate_node */ - if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) + if (amdgpu_mn_read_lock(amn, blockable)) return -EAGAIN; - it = interval_tree_iter_first(&amn->objects, range->start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; - if (!mmu_notifier_range_blockable(range)) { + if (!blockable) { amdgpu_mn_read_unlock(amn); return -EAGAIN; } node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, range->start, end); + it = interval_tree_iter_next(it, start, end); - amdgpu_mn_invalidate_node(node, range->start, end); + amdgpu_mn_invalidate_node(node, start, end); } + amdgpu_mn_read_unlock(amn); + return 0; } /** - * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range + * @mirror: the hmm_mirror (mm) is about to update + * @update: the update start, end address * * We temporarily evict all BOs between start and end. This * necessitates evicting all user-mode queues of the process. The BOs * are restorted in amdgpu_mn_invalidate_range_end_hsa. */ -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, + const struct hmm_update *update) { - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror); + unsigned long start = update->start; + unsigned long end = update->end; + bool blockable = update->blockable; struct interval_tree_node *it; - unsigned long end; /* notification is exclusive, but interval is inclusive */ - end = range->end - 1; + end -= 1; - if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range))) + if (amdgpu_mn_read_lock(amn, blockable)) return -EAGAIN; - it = interval_tree_iter_first(&amn->objects, range->start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; struct amdgpu_bo *bo; - if (!mmu_notifier_range_blockable(range)) { + if (!blockable) { amdgpu_mn_read_unlock(amn); return -EAGAIN; } node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, range->start, end); + it = interval_tree_iter_next(it, start, end); list_for_each_entry(bo, &node->bos, mn_list) { struct kgd_mem *mem = bo->kfd_bo; if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - range->start, - end)) - amdgpu_amdkfd_evict_userptr(mem, range->mm); + start, end)) + amdgpu_amdkfd_evict_userptr(mem, amn->mm); } } + amdgpu_mn_read_unlock(amn); + return 0; } -/** - * amdgpu_mn_invalidate_range_end - callback to notify about mm change - * - * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range - * - * Release the lock again to allow new command submissions. +/* Low bits of any reasonable mm pointer will be unused due to struct + * alignment. Use these bits to make a unique key from the mm pointer + * and notifier type. */ -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); - - amdgpu_mn_read_unlock(amn); -} +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) -static const struct mmu_notifier_ops amdgpu_mn_ops[] = { +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { [AMDGPU_MN_TYPE_GFX] = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx, + .release = amdgpu_hmm_mirror_release }, [AMDGPU_MN_TYPE_HSA] = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa, + .release = amdgpu_hmm_mirror_release }, }; -/* Low bits of any reasonable mm pointer will be unused due to struct - * alignment. Use these bits to make a unique key from the mm pointer - * and notifier type. - */ -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) - /** - * amdgpu_mn_get - create notifier context + * amdgpu_mn_get - create HMM mirror context * * @adev: amdgpu device pointer * @type: type of MMU notifier context * - * Creates a notifier context for current->mm. + * Creates a HMM mirror context for current->mm. */ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type) @@ -401,12 +375,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, amn->mm = mm; init_rwsem(&amn->lock); amn->type = type; - amn->mn.ops = &amdgpu_mn_ops[type]; amn->objects = RB_ROOT_CACHED; - mutex_init(&amn->read_lock); - atomic_set(&amn->recursion, 0); - r = __mmu_notifier_register(&amn->mn, mm); + amn->mirror.ops = &amdgpu_hmm_mirror_ops[type]; + r = hmm_mirror_register(&amn->mirror, mm); if (r) goto free_amn; @@ -432,7 +404,7 @@ free_amn: * @bo: amdgpu buffer object * @addr: userptr addr we should monitor * - * Registers an MMU notifier for the given BO at the specified address. + * Registers an HMM mirror for the given BO at the specified address. * Returns 0 on success, -ERRNO if anything goes wrong. */ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) @@ -488,11 +460,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) } /** - * amdgpu_mn_unregister - unregister a BO for notifier updates + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates * * @bo: amdgpu buffer object * - * Remove any registration of MMU notifier updates from the buffer object. + * Remove any registration of HMM mirror updates from the buffer object. */ void amdgpu_mn_unregister(struct amdgpu_bo *bo) { @@ -528,3 +500,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mutex_unlock(&adev->mn_lock); } +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + +void amdgpu_hmm_init_range(struct hmm_range *range) +{ + if (range) { + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->pfns = NULL; + INIT_LIST_HEAD(&range->list); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index eb0f432f78fe..f5b67c63ed6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -25,22 +25,24 @@ #define __AMDGPU_MN_H__ /* - * MMU Notifier + * HMM mirror */ struct amdgpu_mn; +struct hmm_range; enum amdgpu_mn_type { AMDGPU_MN_TYPE_GFX, AMDGPU_MN_TYPE_HSA, }; -#if defined(CONFIG_MMU_NOTIFIER) +#if defined(CONFIG_HMM_MIRROR) void amdgpu_mn_lock(struct amdgpu_mn *mn); void amdgpu_mn_unlock(struct amdgpu_mn *mn); struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); +void amdgpu_hmm_init_range(struct hmm_range *range); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} @@ -51,6 +53,8 @@ static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, } static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { + DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, " + "add CONFIG_ZONE_DEVICE=y in config file to fix this\n"); return -ENODEV; } static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 2e9e3db778c6..eb9975f4decb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -331,8 +331,6 @@ struct amdgpu_mode_info { struct drm_property *audio_property; /* FMT dithering */ struct drm_property *dither_property; - /* maximum number of bits per channel for monitor color */ - struct drm_property *max_bpc_property; /* Adaptive Backlight Modulation (power feature) */ struct drm_property *abm_level_property; /* hardcoded DFP edid from BIOS */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 34471dbaa872..a73e1903d29b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -67,6 +67,15 @@ static const struct cg_flag_name clocks[] = { {0, NULL}, }; +static const struct hwmon_temp_label { + enum PP_HWMON_TEMP channel; + const char *label; +} temp_label[] = { + {PP_TEMP_EDGE, "edge"}, + {PP_TEMP_JUNCTION, "junction"}, + {PP_TEMP_MEM, "mem"}, +}; + void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) { if (adev->pm.dpm_enabled) { @@ -758,7 +767,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev, pr_debug("featuremask = 0x%llx\n", featuremask); - if (adev->powerplay.pp_funcs->set_ppfeature_status) { + if (is_support_sw_smu(adev)) { + ret = smu_set_ppfeature_status(&adev->smu, featuremask); + if (ret) + return -EINVAL; + } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask); if (ret) return -EINVAL; @@ -774,7 +787,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - if (adev->powerplay.pp_funcs->get_ppfeature_status) + if (is_support_sw_smu(adev)) { + return smu_get_ppfeature_status(&adev->smu, buf); + } else if (adev->powerplay.pp_funcs->get_ppfeature_status) return amdgpu_dpm_get_ppfeature_status(adev, buf); return snprintf(buf, PAGE_SIZE, "\n"); @@ -1303,6 +1318,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, } /** + * DOC: mem_busy_percent + * + * The amdgpu driver provides a sysfs API for reading how busy the VRAM + * is as a percentage. The file mem_busy_percent is used for this. + * The SMU firmware computes a percentage of load based on the + * aggregate activity level in the IP cores. + */ +static ssize_t amdgpu_get_memory_busy_percent(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int r, value, size = sizeof(value); + + /* read the IP busy sensor */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, + (void *)&value, &size); + + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", value); +} + +/** * DOC: pcie_bw * * The amdgpu driver provides a sysfs API for estimating how much data @@ -1327,6 +1368,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, count0, count1, pcie_get_mps(adev->pdev)); } +/** + * DOC: unique_id + * + * The amdgpu driver provides a sysfs API for providing a unique ID for the GPU + * The file unique_id is used for this. + * This will provide a Unique ID that will persist from machine to machine + * + * NOTE: This will only work for GFX9 and newer. This file will be absent + * on unsupported ASICs (GFX8 and older) + */ +static ssize_t amdgpu_get_unique_id(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + if (adev->unique_id) + return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id); + + return 0; +} + static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, amdgpu_get_dpm_forced_performance_level, @@ -1371,10 +1435,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, amdgpu_set_pp_od_clk_voltage); static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, amdgpu_get_busy_percent, NULL); +static DEVICE_ATTR(mem_busy_percent, S_IRUGO, + amdgpu_get_memory_busy_percent, NULL); static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, amdgpu_get_ppfeature_status, amdgpu_set_ppfeature_status); +static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -1382,6 +1449,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; + int channel = to_sensor_dev_attr(attr)->index; int r, temp, size = sizeof(temp); /* Can't get temperature when the card is off */ @@ -1389,11 +1457,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* get the temperature */ - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, - (void *)&temp, &size); - if (r) - return r; + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + switch (channel) { + case PP_TEMP_JUNCTION: + /* get current junction temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_EDGE: + /* get current edge temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + case PP_TEMP_MEM: + /* get current memory temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, + (void *)&temp, &size); + if (r) + return r; + break; + } return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -1414,6 +1503,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", temp); } +static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int hyst = to_sensor_dev_attr(attr)->index; + int temp; + + if (hyst) + temp = adev->pm.dpm.thermal.min_hotspot_temp; + else + temp = adev->pm.dpm.thermal.max_hotspot_crit_temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + +static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int hyst = to_sensor_dev_attr(attr)->index; + int temp; + + if (hyst) + temp = adev->pm.dpm.thermal.min_mem_temp; + else + temp = adev->pm.dpm.thermal.max_mem_crit_temp; + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + +static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int channel = to_sensor_dev_attr(attr)->index; + + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label); +} + +static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int channel = to_sensor_dev_attr(attr)->index; + int temp = 0; + + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + switch (channel) { + case PP_TEMP_JUNCTION: + temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp; + break; + case PP_TEMP_EDGE: + temp = adev->pm.dpm.thermal.max_edge_emergency_temp; + break; + case PP_TEMP_MEM: + temp = adev->pm.dpm.thermal.max_mem_emergency_temp; + break; + } + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, struct device_attribute *attr, char *buf) @@ -1983,11 +2142,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * * hwmon interfaces for GPU temperature: * - * - temp1_input: the on die GPU temperature in millidegrees Celsius + * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius + * - temp2_input and temp3_input are supported on SOC15 dGPUs only + * + * - temp[1-3]_label: temperature channel label + * - temp2_label and temp3_label are supported on SOC15 dGPUs only + * + * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius + * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only * - * - temp1_crit: temperature critical max value in millidegrees Celsius + * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only * - * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius + * - these are supported on SOC15 dGPUs only * * hwmon interfaces for GPU voltage: * @@ -2035,9 +2203,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * */ -static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM); +static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); +static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); +static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE); +static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION); +static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM); static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0); static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0); static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); @@ -2064,6 +2244,18 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, &sensor_dev_attr_temp1_crit.dev_attr.attr, &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp2_input.dev_attr.attr, + &sensor_dev_attr_temp2_crit.dev_attr.attr, + &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp3_input.dev_attr.attr, + &sensor_dev_attr_temp3_crit.dev_attr.attr, + &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp1_emergency.dev_attr.attr, + &sensor_dev_attr_temp2_emergency.dev_attr.attr, + &sensor_dev_attr_temp3_emergency.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + &sensor_dev_attr_temp2_label.dev_attr.attr, + &sensor_dev_attr_temp3_label.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_pwm1_enable.dev_attr.attr, &sensor_dev_attr_pwm1_min.dev_attr.attr, @@ -2186,6 +2378,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_freq2_label.dev_attr.attr)) return 0; + /* only SOC15 dGPUs support hotspot and mem temperatures */ + if (((adev->flags & AMD_IS_APU) || + adev->asic_type < CHIP_VEGA10) && + (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || + attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_input.dev_attr.attr || + attr == &sensor_dev_attr_temp3_input.dev_attr.attr || + attr == &sensor_dev_attr_temp2_label.dev_attr.attr || + attr == &sensor_dev_attr_temp3_label.dev_attr.attr)) + return 0; + return effective_mode; } @@ -2612,6 +2820,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) "gpu_busy_level\n"); return ret; } + /* APU does not have its own dedicated memory */ + if (!(adev->flags & AMD_IS_APU)) { + ret = device_create_file(adev->dev, + &dev_attr_mem_busy_percent); + if (ret) { + DRM_ERROR("failed to create device file " + "mem_busy_percent\n"); + return ret; + } + } /* PCIe Perf counters won't work on APU nodes */ if (!(adev->flags & AMD_IS_APU)) { ret = device_create_file(adev->dev, &dev_attr_pcie_bw); @@ -2620,6 +2838,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } } + if (adev->unique_id) + ret = device_create_file(adev->dev, &dev_attr_unique_id); + if (ret) { + DRM_ERROR("failed to create device file unique_id\n"); + return ret; + } ret = amdgpu_debugfs_pm_init(adev); if (ret) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -2678,7 +2902,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) &dev_attr_pp_od_clk_voltage); device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); if (!(adev->flags & AMD_IS_APU)) + device_remove_file(adev->dev, &dev_attr_mem_busy_percent); + if (!(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_pcie_bw); + if (adev->unique_id) + device_remove_file(adev->dev, &dev_attr_unique_id); if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_ppfeatures); @@ -2775,6 +3003,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a /* GPU Load */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size)) seq_printf(m, "GPU Load: %u %%\n", value); + /* MEM Load */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size)) + seq_printf(m, "MEM Load: %u %%\n", value); + seq_printf(m, "\n"); /* SMC feature mask */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 86cc24b2e0aa..af9835c8395d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -289,6 +289,34 @@ static int psp_asd_load(struct psp_context *psp) return ret; } +static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t id, uint32_t value) +{ + cmd->cmd_id = GFX_CMD_ID_PROG_REG; + cmd->cmd.cmd_setup_reg_prog.reg_value = value; + cmd->cmd.cmd_setup_reg_prog.reg_id = id; +} + +int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, + uint32_t value) +{ + struct psp_gfx_cmd_resp *cmd = NULL; + int ret = 0; + + if (reg >= PSP_REG_LAST) + return -EINVAL; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_reg_prog_cmd_buf(cmd, reg, value); + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + kfree(cmd); + return ret; +} + static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared, uint32_t xgmi_ta_size, uint32_t shared_size) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index cde113f07c96..cf49539b0b07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -62,6 +62,14 @@ struct psp_ring uint32_t ring_size; }; +/* More registers may will be supported */ +enum psp_reg_prog_id { + PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */ + PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */ + PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */ + PSP_REG_LAST +}; + struct psp_funcs { int (*init_microcode)(struct psp_context *psp); @@ -95,12 +103,26 @@ struct psp_funcs int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); }; +#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 +struct psp_xgmi_node_info { + uint64_t node_id; + uint8_t num_hops; + uint8_t is_sharing_enabled; + enum ta_xgmi_assigned_sdma_engine sdma_engine; +}; + +struct psp_xgmi_topology_info { + uint32_t num_nodes; + struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; +}; + struct psp_xgmi_context { uint8_t initialized; uint32_t session_id; struct amdgpu_bo *xgmi_shared_bo; uint64_t xgmi_shared_mc_addr; void *xgmi_shared_buf; + struct psp_xgmi_topology_info top_info; }; struct psp_ras_context { @@ -181,18 +203,6 @@ struct amdgpu_psp_funcs { enum AMDGPU_UCODE_ID); }; -#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 -struct psp_xgmi_node_info { - uint64_t node_id; - uint8_t num_hops; - uint8_t is_sharing_enabled; - enum ta_xgmi_assigned_sdma_engine sdma_engine; -}; - -struct psp_xgmi_topology_info { - uint32_t num_nodes; - struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; -}; #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) @@ -250,5 +260,6 @@ int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; - +int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, + uint32_t value); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 22bd21efe6b1..7c8a4aedf07c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -90,6 +90,12 @@ struct ras_manager { struct ras_err_data err_data; }; +struct ras_badpage { + unsigned int bp; + unsigned int size; + unsigned int flags; +}; + const char *ras_error_string[] = { "none", "parity", @@ -118,7 +124,8 @@ const char *ras_block_string[] = { #define ras_err_str(i) (ras_error_string[ffs(i)]) #define ras_block_str(i) (ras_block_string[i]) -#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 +#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1 +#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) static void amdgpu_ras_self_test(struct amdgpu_device *adev) @@ -237,8 +244,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } -/* - * DOC: ras debugfs control interface +/** + * DOC: AMDGPU RAS debugfs control interface * * It accepts struct ras_debug_if who has two members. * @@ -521,6 +528,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, enable ? "enable":"disable", ras_block_str(head->block), ret); + if (ret == TA_RAS_STATUS__RESET_NEEDED) + return -EAGAIN; return -EINVAL; } @@ -541,16 +550,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, return -EINVAL; if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) { - /* If ras is enabled by vbios, we set up ras object first in - * both case. For enable, that is all what we need do. For - * disable, we need perform a ras TA disable cmd after that. - */ - ret = __amdgpu_ras_feature_enable(adev, head, 1); - if (ret) - return ret; + if (enable) { + /* There is no harm to issue a ras TA cmd regardless of + * the currecnt ras state. + * If current state == target state, it will do nothing + * But sometimes it requests driver to reset and repost + * with error code -EAGAIN. + */ + ret = amdgpu_ras_feature_enable(adev, head, 1); + /* With old ras TA, we might fail to enable ras. + * Log it and just setup the object. + * TODO need remove this WA in the future. + */ + if (ret == -EINVAL) { + ret = __amdgpu_ras_feature_enable(adev, head, 1); + if (!ret) + DRM_INFO("RAS INFO: %s setup object\n", + ras_block_str(head->block)); + } + } else { + /* setup the object then issue a ras TA disable cmd.*/ + ret = __amdgpu_ras_feature_enable(adev, head, 1); + if (ret) + return ret; - if (!enable) ret = amdgpu_ras_feature_enable(adev, head, 0); + } } else ret = amdgpu_ras_feature_enable(adev, head, enable); @@ -691,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, /* sysfs begin */ +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage **bps, unsigned int *count); + +static char *amdgpu_ras_badpage_flags_str(unsigned int flags) +{ + switch (flags) { + case 0: + return "R"; + case 1: + return "P"; + case 2: + default: + return "F"; + }; +} + +/* + * DOC: ras sysfs gpu_vram_bad_pages interface + * + * It allows user to read the bad pages of vram on the gpu through + * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages + * + * It outputs multiple lines, and each line stands for one gpu page. + * + * The format of one line is below, + * gpu pfn : gpu page size : flags + * + * gpu pfn and gpu page size are printed in hex format. + * flags can be one of below character, + * R: reserved, this gpu page is reserved and not able to use. + * P: pending for reserve, this gpu page is marked as bad, will be reserved + * in next window of page_reserve. + * F: unable to reserve. this gpu page can't be reserved due to some reasons. + * + * examples: + * 0x00000001 : 0x00001000 : R + * 0x00000002 : 0x00001000 : P + */ + +static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, + struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t ppos, size_t count) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, badpages_attr); + struct amdgpu_device *adev = con->adev; + const unsigned int element_size = + sizeof("0xabcdabcd : 0x12345678 : R\n") - 1; + unsigned int start = div64_ul(ppos + element_size - 1, element_size); + unsigned int end = div64_ul(ppos + count - 1, element_size); + ssize_t s = 0; + struct ras_badpage *bps = NULL; + unsigned int bps_count = 0; + + memset(buf, 0, count); + + if (amdgpu_ras_badpages_read(adev, &bps, &bps_count)) + return 0; + + for (; start < end && start < bps_count; start++) + s += scnprintf(&buf[s], element_size + 1, + "0x%08x : 0x%08x : %1s\n", + bps[start].bp, + bps[start].size, + amdgpu_ras_badpage_flags_str(bps[start].flags)); + + kfree(bps); + + return s; +} + static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, struct device_attribute *attr, char *buf) { @@ -731,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) &con->features_attr.attr, NULL }; + struct bin_attribute *bin_attrs[] = { + &con->badpages_attr, + NULL + }; struct attribute_group group = { .name = "ras", .attrs = attrs, + .bin_attrs = bin_attrs, }; con->features_attr = (struct device_attribute) { @@ -743,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) }, .show = amdgpu_ras_sysfs_features_read, }; + + con->badpages_attr = (struct bin_attribute) { + .attr = { + .name = "gpu_vram_bad_pages", + .mode = S_IRUGO, + }, + .size = 0, + .private = NULL, + .read = amdgpu_ras_sysfs_badpages_read, + }; + sysfs_attr_init(attrs[0]); + sysfs_bin_attr_init(bin_attrs[0]); return sysfs_create_group(&adev->dev->kobj, &group); } @@ -755,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) &con->features_attr.attr, NULL }; + struct bin_attribute *bin_attrs[] = { + &con->badpages_attr, + NULL + }; struct attribute_group group = { .name = "ras", .attrs = attrs, + .bin_attrs = bin_attrs, }; sysfs_remove_group(&adev->dev->kobj, &group); @@ -1089,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) /* ih end */ /* recovery begin */ + +/* return 0 on success. + * caller need free bps. + */ +static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, + struct ras_badpage **bps, unsigned int *count) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + int i = 0; + int ret = 0; + + if (!con || !con->eh_data || !bps || !count) + return -EINVAL; + + mutex_lock(&con->recovery_lock); + data = con->eh_data; + if (!data || data->count == 0) { + *bps = NULL; + goto out; + } + + *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL); + if (!*bps) { + ret = -ENOMEM; + goto out; + } + + for (; i < data->count; i++) { + (*bps)[i] = (struct ras_badpage){ + .bp = data->bps[i].bp, + .size = AMDGPU_GPU_PAGE_SIZE, + .flags = 0, + }; + + if (data->last_reserved <= i) + (*bps)[i].flags = 1; + else if (data->bps[i].bo == NULL) + (*bps)[i].flags = 2; + } + + *count = data->count; +out: + mutex_unlock(&con->recovery_lock); + return ret; +} + static void amdgpu_ras_do_recovery(struct work_struct *work) { struct amdgpu_ras *ras = @@ -1340,6 +1505,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) } /* recovery end */ +/* return 0 if ras will reset gpu and repost.*/ +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, + unsigned int block) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (!ras) + return -EINVAL; + + ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET; + return 0; +} + /* * check hardware's ras ability which will be saved in hw_supported. * if hardware does not support ras, we can skip some ras initializtion and @@ -1415,8 +1593,10 @@ recovery_out: return -EINVAL; } -/* do some init work after IP late init as dependence */ -void amdgpu_ras_post_init(struct amdgpu_device *adev) +/* do some init work after IP late init as dependence. + * and it runs in resume/gpu reset/booting up cases. + */ +void amdgpu_ras_resume(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; @@ -1444,6 +1624,32 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev) } } } + + if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) { + con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET; + /* setup ras obj state as disabled. + * for init_by_vbios case. + * if we want to enable ras, just enable it in a normal way. + * If we want do disable it, need setup ras obj as enabled, + * then issue another TA disable cmd. + * See feature_enable_on_boot + */ + amdgpu_ras_disable_all_features(adev, 1); + amdgpu_ras_reset_gpu(adev, 0); + } +} + +void amdgpu_ras_suspend(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (!con) + return; + + amdgpu_ras_disable_all_features(adev, 0); + /* Make sure all ras objects are disabled. */ + if (con->features) + amdgpu_ras_disable_all_features(adev, 1); } /* do some fini work before IP fini as dependence */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index eaef5edefc34..c6b34fbd695f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -93,6 +93,7 @@ struct amdgpu_ras { struct dentry *ent; /* sysfs */ struct device_attribute features_attr; + struct bin_attribute badpages_attr; /* block array */ struct ras_manager *objs; @@ -175,6 +176,12 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, return ras && (ras->supported & (1 << block)); } +int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, + unsigned int block); + +void amdgpu_ras_resume(struct amdgpu_device *adev); +void amdgpu_ras_suspend(struct amdgpu_device *adev); + int amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); @@ -187,13 +194,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev, bool is_baco) { - /* remove me when gpu reset works on vega20 A1. */ -#if 0 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) schedule_work(&ras->recovery_work); -#endif return 0; } @@ -255,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { /* called in ip_init and ip_fini */ int amdgpu_ras_init(struct amdgpu_device *adev); -void amdgpu_ras_post_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index d7fae2676269..cdddce938bf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -114,6 +114,7 @@ struct amdgpu_ring_funcs { uint32_t align_mask; u32 nop; bool support_64bit_ptrs; + bool no_user_fence; unsigned vmhub; unsigned extra_dw; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0c52d1f9fe0f..7138dc1dd1f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include <linux/pagemap.h> #include <linux/debugfs.h> #include <linux/iommu.h> +#include <linux/hmm.h> #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" @@ -703,143 +704,191 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, /* * TTM backend functions. */ -struct amdgpu_ttm_gup_task_list { - struct list_head list; - struct task_struct *task; -}; - struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; struct task_struct *usertask; uint32_t userflags; - spinlock_t guptasklock; - struct list_head guptasks; - atomic_t mmu_invalidations; - uint32_t last_set_pages; +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + struct hmm_range *ranges; + int nr_ranges; +#endif }; /** - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR - * pointer to memory + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user + * memory and start HMM tracking CPU page table update * - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). - * This provides a wrapper around the get_user_pages() call to provide - * device accessible pages that back user memory. + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only + * once afterwards to stop HMM tracking */ +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + +/* Support Userptr pages cross max 16 vmas */ +#define MAX_NR_VMAS (16) + int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; - unsigned int flags = 0; - unsigned pinned = 0; - int r; + unsigned long start = gtt->userptr; + unsigned long end = start + ttm->num_pages * PAGE_SIZE; + struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS]; + struct hmm_range *ranges; + unsigned long nr_pages, i; + uint64_t *pfns, f; + int r = 0; if (!mm) /* Happens during process shutdown */ return -ESRCH; - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - flags |= FOLL_WRITE; - down_read(&mm->mmap_sem); - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* - * check that we only use anonymous memory to prevent problems - * with writeback - */ - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma; + /* user pages may cross multiple VMAs */ + gtt->nr_ranges = 0; + do { + unsigned long vm_start; - vma = find_vma(mm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(&mm->mmap_sem); - return -EPERM; + if (gtt->nr_ranges >= MAX_NR_VMAS) { + DRM_ERROR("Too many VMAs in userptr range\n"); + r = -EFAULT; + goto out; } + + vm_start = vma ? vma->vm_end : start; + vma = find_vma(mm, vm_start); + if (unlikely(!vma || vm_start < vma->vm_start)) { + r = -EFAULT; + goto out; + } + vmas[gtt->nr_ranges++] = vma; + } while (end > vma->vm_end); + + DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n", + start, gtt->nr_ranges, ttm->num_pages); + + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + vmas[0]->vm_file)) { + r = -EPERM; + goto out; } - /* loop enough times using contiguous pages of memory */ - do { - unsigned num_pages = ttm->num_pages - pinned; - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **p = pages + pinned; - struct amdgpu_ttm_gup_task_list guptask; + ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL); + if (unlikely(!ranges)) { + r = -ENOMEM; + goto out; + } - guptask.task = current; - spin_lock(>t->guptasklock); - list_add(&guptask.list, >t->guptasks); - spin_unlock(>t->guptasklock); + pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) { + r = -ENOMEM; + goto out_free_ranges; + } - if (mm == current->mm) - r = get_user_pages(userptr, num_pages, flags, p, NULL); - else - r = get_user_pages_remote(gtt->usertask, - mm, userptr, num_pages, - flags, p, NULL, NULL); + for (i = 0; i < gtt->nr_ranges; i++) + amdgpu_hmm_init_range(&ranges[i]); - spin_lock(>t->guptasklock); - list_del(&guptask.list); - spin_unlock(>t->guptasklock); + f = ranges[0].flags[HMM_PFN_VALID]; + f |= amdgpu_ttm_tt_is_readonly(ttm) ? + 0 : ranges[0].flags[HMM_PFN_WRITE]; + memset64(pfns, f, ttm->num_pages); - if (r < 0) - goto release_pages; + for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) { + ranges[i].vma = vmas[i]; + ranges[i].start = max(start, vmas[i]->vm_start); + ranges[i].end = min(end, vmas[i]->vm_end); + ranges[i].pfns = pfns + nr_pages; + nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE; - pinned += r; + r = hmm_vma_fault(&ranges[i], true); + if (unlikely(r)) + break; + } + if (unlikely(r)) { + while (i--) + hmm_vma_range_done(&ranges[i]); - } while (pinned < ttm->num_pages); + goto out_free_pfns; + } up_read(&mm->mmap_sem); + + for (i = 0; i < ttm->num_pages; i++) { + pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]); + if (!pages[i]) { + pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", + i, pfns[i]); + goto out_invalid_pfn; + } + } + gtt->ranges = ranges; + return 0; -release_pages: - release_pages(pages, pinned); +out_free_pfns: + kvfree(pfns); +out_free_ranges: + kvfree(ranges); +out: up_read(&mm->mmap_sem); + return r; + +out_invalid_pfn: + for (i = 0; i < gtt->nr_ranges; i++) + hmm_vma_range_done(&ranges[i]); + kvfree(pfns); + kvfree(ranges); + return -ENOMEM; } /** - * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * Check if the pages backing this ttm range have been invalidated * - * Called by amdgpu_cs_list_validate(). This creates the page list - * that backs user memory and will ultimately be mapped into the device - * address space. + * Returns: true if pages are still valid */ -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; + bool r = false; + int i; - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); - for (i = 0; i < ttm->num_pages; ++i) { - if (ttm->pages[i]) - put_page(ttm->pages[i]); + if (!gtt || !gtt->userptr) + return false; - ttm->pages[i] = pages ? pages[i] : NULL; + DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n", + gtt->userptr, gtt->nr_ranges, ttm->num_pages); + + WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns, + "No user pages to check\n"); + + if (gtt->ranges) { + for (i = 0; i < gtt->nr_ranges; i++) + r |= hmm_vma_range_done(>t->ranges[i]); + kvfree(gtt->ranges[0].pfns); + kvfree(gtt->ranges); + gtt->ranges = NULL; } + + return r; } +#endif /** - * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty + * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. * - * Called while unpinning userptr pages + * Called by amdgpu_cs_list_validate(). This creates the page list + * that backs user memory and will ultimately be mapped into the device + * address space. */ -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; - - for (i = 0; i < ttm->num_pages; ++i) { - struct page *page = ttm->pages[i]; + unsigned long i; - if (!page) - continue; - - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - set_page_dirty(page); - - mark_page_accessed(page); - } + for (i = 0; i < ttm->num_pages; ++i) + ttm->pages[i] = pages ? pages[i] : NULL; } /** @@ -901,10 +950,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - /* mark the pages as dirty */ - amdgpu_ttm_tt_mark_user_pages(ttm); - sg_free_table(ttm->sg); + +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + if (gtt->ranges && + ttm->pages[0] == hmm_pfn_to_page(>t->ranges[0], + gtt->ranges[0].pfns[0])) + WARN_ONCE(1, "Missing get_user_page_done\n"); +#endif } int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, @@ -1254,11 +1307,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->usertask = current->group_leader; get_task_struct(gtt->usertask); - spin_lock_init(>t->guptasklock); - INIT_LIST_HEAD(>t->guptasks); - atomic_set(>t->mmu_invalidations, 0); - gtt->last_set_pages = 0; - return 0; } @@ -1287,7 +1335,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct amdgpu_ttm_gup_task_list *entry; unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1300,48 +1347,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt->userptr > end || gtt->userptr + size <= start) return false; - /* Search the lists of tasks that hold this mapping and see - * if current is one of them. If it is return false. - */ - spin_lock(>t->guptasklock); - list_for_each_entry(entry, >t->guptasks, list) { - if (entry->task == current) { - spin_unlock(>t->guptasklock); - return false; - } - } - spin_unlock(>t->guptasklock); - - atomic_inc(>t->mmu_invalidations); - return true; } /** - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? - */ -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, - int *last_invalidated) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - int prev_invalidated = *last_invalidated; - - *last_invalidated = atomic_read(>t->mmu_invalidations); - return prev_invalidated != *last_invalidated; -} - -/** - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object - * been invalidated since the last time they've been set? + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? */ -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; if (gtt == NULL || !gtt->userptr) return false; - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; + return true; } /** @@ -1753,44 +1772,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize various on-chip memory pools */ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, - adev->gds.mem.total_size); + adev->gds.gds_size); if (r) { DRM_ERROR("Failed initializing GDS heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - 4, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, - adev->gds.gws.total_size); + adev->gds.gws_size); if (r) { DRM_ERROR("Failed initializing gws heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, - adev->gds.oa.total_size); + adev->gds.oa_size); if (r) { DRM_ERROR("Failed initializing oa heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - /* Register debugfs entries for amdgpu_ttm */ r = amdgpu_ttm_debugfs_init(adev); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index b5b2d101f7db..c2b7669004ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -101,9 +101,21 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); +#else +static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) +{ + return -EPERM; +} +static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +{ + return false; +} +#endif + void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); @@ -112,7 +124,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end); bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 7b33867036e7..33c1eb76c076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -313,6 +313,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; } +#define FW_VERSION_ATTR(name, mode, field) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct drm_device *ddev = dev_get_drvdata(dev); \ + struct amdgpu_device *adev = ddev->dev_private; \ + \ + return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \ +} \ +static DEVICE_ATTR(name, mode, show_##name, NULL) + +FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version); +FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version); +FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version); +FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version); +FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version); +FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version); +FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version); +FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version); +FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version); +FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version); +FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version); +FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version); +FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version); +FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version); +FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version); +FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version); +FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version); +FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version); +FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version); +FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version); +FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version); + +static struct attribute *fw_attrs[] = { + &dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr, + &dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr, + &dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr, + &dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr, + &dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr, + &dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr, + &dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr, + &dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr, + &dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr, + &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr, + &dev_attr_dmcu_fw_version.attr, NULL +}; + +static const struct attribute_group fw_attr_group = { + .name = "fw_version", + .attrs = fw_attrs +}; + +int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev) +{ + return sysfs_create_group(&adev->dev->kobj, &fw_attr_group); +} + +void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev) +{ + sysfs_remove_group(&adev->dev->kobj, &fw_attr_group); +} + static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, struct amdgpu_firmware_info *ucode, uint64_t mc_addr, void *kptr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 7ac25a1c7853..ec4c2ea1f05a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -291,7 +291,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, int amdgpu_ucode_init_bo(struct amdgpu_device *adev); int amdgpu_ucode_create_bo(struct amdgpu_device *adev); +int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev); void amdgpu_ucode_free_bo(struct amdgpu_device *adev); +void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev); enum amdgpu_firmware_load_type amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ecf6f96df2ad..118451f5e3aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -212,132 +212,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) return 0; } -static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev, - struct dpg_pause_state *new_state) -{ - int ret_code; - uint32_t reg_data = 0; - uint32_t reg_data2 = 0; - struct amdgpu_ring *ring; - - /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.fw_based != new_state->fw_based) { - DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, - new_state->fw_based, new_state->jpeg); - - reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & - (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); - - if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { - ret_code = 0; - - if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - - if (!ret_code) { - /* pause DPG non-jpeg */ - reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, - UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); - - /* Restore */ - ring = &adev->vcn.ring_enc[0]; - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - - ring = &adev->vcn.ring_enc[1]; - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - - ring = &adev->vcn.ring_dec; - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - } - } else { - /* unpause dpg non-jpeg, no need to wait */ - reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - } - adev->vcn.pause_state.fw_based = new_state->fw_based; - } - - /* pause/unpause if state is changed */ - if (adev->vcn.pause_state.jpeg != new_state->jpeg) { - DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", - adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, - new_state->fw_based, new_state->jpeg); - - reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & - (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); - - if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { - ret_code = 0; - - if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - - if (!ret_code) { - /* Make sure JPRG Snoop is disabled before sending the pause */ - reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); - reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; - WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); - - /* pause DPG jpeg */ - reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, - UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, - UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); - - /* Restore */ - ring = &adev->vcn.ring_jpeg; - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, - UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | - UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, - UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - - ring = &adev->vcn.ring_dec; - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); - SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, - UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); - } - } else { - /* unpause dpg jpeg, no need to wait */ - reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; - WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); - } - adev->vcn.pause_state.jpeg = new_state->jpeg; - } - - return 0; -} - static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = @@ -362,7 +236,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - amdgpu_vcn_pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, &new_state); } fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); @@ -417,7 +291,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) new_state.jpeg = VCN_DPG_STATE__PAUSE; - amdgpu_vcn_pause_dpg_mode(adev, &new_state); + adev->vcn.pause_dpg_mode(adev, &new_state); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index a0ad19af9080..a1ee19251aae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -45,6 +45,27 @@ #define VCN_ENC_CMD_REG_WRITE 0x0000000b #define VCN_ENC_CMD_REG_WAIT 0x0000000c +#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ + ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__MASK_EN_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \ + }) + +#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ + do { \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + } while (0) + enum engine_status_constants { UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, @@ -81,6 +102,8 @@ struct amdgpu_vcn { unsigned num_enc_rings; enum amd_powergating_state cur_state; struct dpg_pause_state pause_state; + int (*pause_dpg_mode)(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 7d484fad3909..1f0bd4d16475 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -426,3 +426,47 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest) return clk; } +void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + + if (virt->ops && virt->ops->init_reg_access_mode) + virt->ops->init_reg_access_mode(adev); +} + +bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev) +{ + bool ret = false; + struct amdgpu_virt *virt = &adev->virt; + + if (amdgpu_sriov_vf(adev) + && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH)) + ret = true; + + return ret; +} + +bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev) +{ + bool ret = false; + struct amdgpu_virt *virt = &adev->virt; + + if (amdgpu_sriov_vf(adev) + && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC) + && !(amdgpu_sriov_runtime(adev))) + ret = true; + + return ret; +} + +bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev) +{ + bool ret = false; + struct amdgpu_virt *virt = &adev->virt; + + if (amdgpu_sriov_vf(adev) + && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING)) + ret = true; + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 584947b7ccf3..dca25deee75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer { uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; }; +/* According to the fw feature, some new reg access modes are supported */ +#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */ +#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */ +#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */ +#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */ + /** * struct amdgpu_virt_ops - amdgpu device virt operations */ @@ -59,6 +65,7 @@ struct amdgpu_virt_ops { void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf); int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); + void (*init_reg_access_mode)(struct amdgpu_device *adev); }; /* @@ -258,6 +265,7 @@ struct amdgpu_virt { uint32_t gim_feature; /* protect DPM events to GIM */ struct mutex dpm_mutex; + uint32_t reg_access_mode; }; #define amdgpu_sriov_enabled(adev) \ @@ -307,4 +315,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest); uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); +void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev); +bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev); +bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev); +bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index a48c84c51775..d11eba09eadd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -40,6 +40,34 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive) return &hive->device_list; } +/** + * DOC: AMDGPU XGMI Support + * + * XGMI is a high speed interconnect that joins multiple GPU cards + * into a homogeneous memory space that is organized by a collective + * hive ID and individual node IDs, both of which are 64-bit numbers. + * + * The file xgmi_device_id contains the unique per GPU device ID and + * is stored in the /sys/class/drm/card${cardno}/device/ directory. + * + * Inside the device directory a sub-directory 'xgmi_hive_info' is + * created which contains the hive ID and the list of nodes. + * + * The hive ID is stored in: + * /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id + * + * The node information is stored in numbered directories: + * /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id + * + * Each device has their own xgmi_hive_info direction with a mirror + * set of node sub-directories. + * + * The XGMI memory space is built by contiguously adding the power of + * two padded VRAM space from each node to each other. + * + */ + + static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev, struct device_attribute *attr, char *buf) { @@ -238,7 +266,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev /* Each psp need to set the latest topology */ ret = psp_xgmi_set_topology_info(&adev->psp, hive->number_devices, - &hive->topology_info); + &adev->psp.xgmi_context.top_info); if (ret) dev_err(adev->dev, "XGMI: Set topology failure on device %llx, hive %llx, ret %d", @@ -248,9 +276,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev return ret; } + +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i; + + for (i = 0 ; i < top->num_nodes; ++i) + if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) + return top->nodes[i].num_hops; + return -EINVAL; +} + int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { - struct psp_xgmi_topology_info *hive_topology; + struct psp_xgmi_topology_info *top_info; struct amdgpu_hive_info *hive; struct amdgpu_xgmi *entry; struct amdgpu_device *tmp_adev = NULL; @@ -283,35 +324,46 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit; } - hive_topology = &hive->topology_info; + top_info = &adev->psp.xgmi_context.top_info; list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); list_for_each_entry(entry, &hive->device_list, head) - hive_topology->nodes[count++].node_id = entry->node_id; + top_info->nodes[count++].node_id = entry->node_id; + top_info->num_nodes = count; hive->number_devices = count; - /* Each psp need to get the latest topology */ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); + /* update node list for other device in the hive */ + if (tmp_adev != adev) { + top_info = &tmp_adev->psp.xgmi_context.top_info; + top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id; + top_info->num_nodes = count; + } + ret = amdgpu_xgmi_update_topology(hive, tmp_adev); + if (ret) + goto exit; + } + + /* get latest topology info for each device from psp */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info); if (ret) { dev_err(tmp_adev->dev, "XGMI: Get topology failure on device %llx, hive %llx, ret %d", tmp_adev->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret); /* To do : continue with some node failed or disable the whole hive */ - break; + goto exit; } } - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = amdgpu_xgmi_update_topology(hive, tmp_adev); - if (ret) - break; - } - if (!ret) ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive); + + mutex_unlock(&hive->hive_lock); +exit: if (!ret) dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n", adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); @@ -320,9 +372,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, ret); - - mutex_unlock(&hive->hive_lock); -exit: return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 3e9c91e9a4bf..fbcee31788c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -27,7 +27,6 @@ struct amdgpu_hive_info { uint64_t hive_id; struct list_head device_list; - struct psp_xgmi_topology_info topology_info; int number_devices; struct mutex hive_lock, reset_lock; struct kobject *kobj; @@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev int amdgpu_xgmi_add_device(struct amdgpu_device *adev); void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev); static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, struct amdgpu_device *bo_adev) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 07c1f239e9c3..3a4f20766a39 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1804,6 +1804,18 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, @@ -1821,6 +1833,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .init_doorbell_index = &legacy_doorbell_index_init, .get_pcie_usage = &cik_get_pcie_usage, .need_reset_on_init = &cik_need_reset_on_init, + .get_pcie_replay_count = &cik_get_pcie_replay_count, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index d5ebe566809b..8c09bf994acd 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -105,6 +105,431 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_DF_MGCG; } +/* hold counter assignment per gpu struct */ +struct df_v3_6_event_mask { + struct amdgpu_device gpu; + uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS]; +}; + +/* get assigned df perfmon ctr as int */ +static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev, + uint64_t config, + int *counter) +{ + struct df_v3_6_event_mask *mask; + int i; + + mask = container_of(adev, struct df_v3_6_event_mask, gpu); + + for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { + if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) { + *counter = i; + return; + } + } +} + +/* get address based on counter assignment */ +static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev, + uint64_t config, + int is_ctrl, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr) +{ + + int target_cntr = -1; + + df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + + if (target_cntr < 0) + return; + + switch (target_cntr) { + + case 0: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0; + break; + case 1: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1; + break; + case 2: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2; + break; + case 3: + *lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3; + *hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3; + break; + + } + +} + +/* get read counter address */ +static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev, + uint64_t config, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr) +{ + df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr); +} + +/* get control counter settings i.e. address and values to set */ +static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, + uint64_t config, + uint32_t *lo_base_addr, + uint32_t *hi_base_addr, + uint32_t *lo_val, + uint32_t *hi_val) +{ + + uint32_t eventsel, instance, unitmask; + uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0; + + df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); + + if (lo_val == NULL || hi_val == NULL) + return; + + if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { + DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x", + *lo_base_addr, *hi_base_addr); + return; + } + + eventsel = GET_EVENT(config); + instance = GET_INSTANCE(config); + unitmask = GET_UNITMASK(config); + + es_5_0 = eventsel & 0x3FUL; + es_13_6 = instance; + es_13_0 = (es_13_6 << 6) + es_5_0; + es_13_12 = (es_13_0 & 0x03000UL) >> 12; + es_11_8 = (es_13_0 & 0x0F00UL) >> 8; + es_7_0 = es_13_0 & 0x0FFUL; + *lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8); + *hi_val = (es_11_8 | ((es_13_12)<<(29))); +} + +/* assign df performance counters for read */ +static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, + uint64_t config, + int *is_assigned) +{ + + struct df_v3_6_event_mask *mask; + int i, target_cntr; + + target_cntr = -1; + + *is_assigned = 0; + + df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + + if (target_cntr >= 0) { + *is_assigned = 1; + return 0; + } + + mask = container_of(adev, struct df_v3_6_event_mask, gpu); + + for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { + if (mask->config_assign_mask[i] == 0ULL) { + mask->config_assign_mask[i] = config & 0x0FFFFFFUL; + return 0; + } + } + + return -ENOSPC; +} + +/* release performance counter */ +static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, + uint64_t config) +{ + + struct df_v3_6_event_mask *mask; + int target_cntr; + + target_cntr = -1; + + df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + + mask = container_of(adev, struct df_v3_6_event_mask, gpu); + + if (target_cntr >= 0) + mask->config_assign_mask[target_cntr] = 0ULL; + +} + +/* + * get xgmi link counters via programmable data fabric (df) counters (max 4) + * using cake tx event. + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @count -> counters to pass + * + */ + +static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev, + int instance, + uint64_t *count) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + uint64_t config; + + config = GET_INSTANCE_CONFIG(instance); + + df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, + &hi_base_addr); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return; + + lo_val = RREG32_PCIE(lo_base_addr); + hi_val = RREG32_PCIE(hi_base_addr); + + *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); +} + +/* + * reset xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * + */ +static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev, + int instance) +{ + uint32_t lo_base_addr, hi_base_addr; + uint64_t config; + + config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16); + + df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, + &hi_base_addr); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return; + + WREG32_PCIE(lo_base_addr, 0UL); + WREG32_PCIE(hi_base_addr, 0UL); +} + +/* + * add xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * + */ + +static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev, + int instance) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + uint64_t config; + int ret, is_assigned; + + if (instance < 0 || instance > 1) + return -EINVAL; + + config = GET_INSTANCE_CONFIG(instance); + + ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned); + + if (ret || is_assigned) + return ret; + + df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + &lo_val, + &hi_val); + + WREG32_PCIE(lo_base_addr, lo_val); + WREG32_PCIE(hi_base_addr, hi_val); + + return ret; +} + + +/* + * start xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @is_enable -> either resume or assign event via df perfmon + * + */ + +static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev, + int instance, + int is_enable) +{ + uint32_t lo_base_addr, hi_base_addr, lo_val; + uint64_t config; + int ret; + + if (instance < 0 || instance > 1) + return -EINVAL; + + if (is_enable) { + + ret = df_v3_6_add_xgmi_link_cntr(adev, instance); + + if (ret) + return ret; + + } else { + + config = GET_INSTANCE_CONFIG(instance); + + df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + NULL, + NULL); + + if (lo_base_addr == 0) + return -EINVAL; + + lo_val = RREG32_PCIE(lo_base_addr); + + WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); + + ret = 0; + } + + return ret; + +} + +/* + * start xgmi link counters + * + * @adev -> amdgpu device + * @instance-> currently cake has 2 links to poll on vega20 + * @is_enable -> either pause or unassign event via df perfmon + * + */ + +static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev, + int instance, + int is_disable) +{ + + uint32_t lo_base_addr, hi_base_addr, lo_val; + uint64_t config; + + config = GET_INSTANCE_CONFIG(instance); + + if (is_disable) { + df_v3_6_reset_xgmi_link_cntr(adev, instance); + df_v3_6_pmc_release_cntr(adev, config); + } else { + + df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + NULL, + NULL); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return -EINVAL; + + lo_val = RREG32_PCIE(lo_base_addr); + + WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); + } + + return 0; +} + +static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, + int is_enable) +{ + int xgmi_tx_link, ret = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 + : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + + if (xgmi_tx_link >= 0) + ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link, + is_enable); + + if (ret) + return ret; + + ret = 0; + break; + default: + break; + } + + return ret; +} + +static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, + int is_disable) +{ + int xgmi_tx_link, ret = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 + : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + + if (xgmi_tx_link >= 0) { + ret = df_v3_6_stop_xgmi_link_cntr(adev, + xgmi_tx_link, + is_disable); + if (ret) + return ret; + } + + ret = 0; + break; + default: + break; + } + + return ret; +} + +static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, + uint64_t config, + uint64_t *count) +{ + + int xgmi_tx_link; + + switch (adev->asic_type) { + case CHIP_VEGA20: + xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 + : (IS_DF_XGMI_1_TX(config) ? 1 : -1); + + if (xgmi_tx_link >= 0) { + df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link); + df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count); + } + + break; + default: + break; + } + +} + const struct amdgpu_df_funcs df_v3_6_funcs = { .init = df_v3_6_init, .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, @@ -113,4 +538,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = { .update_medium_grain_clock_gating = df_v3_6_update_medium_grain_clock_gating, .get_clockgating_state = df_v3_6_get_clockgating_state, + .pmc_start = df_v3_6_pmc_start, + .pmc_stop = df_v3_6_pmc_stop, + .pmc_get_count = df_v3_6_pmc_get_count }; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h index e79c58e5efcb..fcffd807764d 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h @@ -35,6 +35,23 @@ enum DF_V3_6_MGCG { DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15 }; +/* Defined in global_features.h as FTI_PERFMON_VISIBLE */ +#define AMDGPU_DF_MAX_COUNTERS 4 + +/* get flags from df perfmon config */ +#define GET_EVENT(x) (x & 0xFFUL) +#define GET_INSTANCE(x) ((x >> 8) & 0xFFUL) +#define GET_UNITMASK(x) ((x >> 16) & 0xFFUL) +#define GET_INSTANCE_CONFIG(x) (0ULL | (0x07ULL) \ + | ((0x046ULL + x) << 8) \ + | (0x02 << 16)) + +/* df event conf macros */ +#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \ + && GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2) +#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \ + && GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2) + extern const struct amdgpu_df_funcs df_v3_6_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index a59e0fdf5a97..4cd1731d62fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4493,12 +4493,8 @@ static int gfx_v7_0_sw_init(void *handle) static int gfx_v7_0_sw_fini(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + int i; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -5070,30 +5066,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev) static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; + adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 02955e6e9dd9..25400b708722 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2057,12 +2057,8 @@ static int gfx_v8_0_sw_init(void *handle) static int gfx_v8_0_sw_fini(void *handle) { - int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + int i; for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -7010,30 +7006,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; + adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } } static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ba67d1023264..c763733619fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -34,6 +34,7 @@ #include "vega10_enum.h" #include "hdp/hdp_4_0_offset.h" +#include "soc15.h" #include "soc15_common.h" #include "clearstate_gfx9.h" #include "v9_structs.h" @@ -307,12 +308,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - soc15_program_register_sequence(adev, - golden_settings_gc_9_0, - ARRAY_SIZE(golden_settings_gc_9_0)); - soc15_program_register_sequence(adev, - golden_settings_gc_9_0_vg10, - ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + if (!amdgpu_virt_support_skip_setting(adev)) { + soc15_program_register_sequence(adev, + golden_settings_gc_9_0, + ARRAY_SIZE(golden_settings_gc_9_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_9_0_vg10, + ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + } break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -1458,8 +1461,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) /* GDS reserve memory: 64 bytes alignment */ adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); - adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; - adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; + adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); @@ -1567,7 +1569,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) gfx_v9_0_write_data_to_reg(ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), - (adev->gds.mem.total_size + + (adev->gds.gds_size + adev->gfx.ngg.gds_reserve_size)); amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); @@ -1781,10 +1783,6 @@ static int gfx_v9_0_sw_fini(void *handle) kfree(ras_if); } - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) @@ -1834,7 +1832,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); } static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) @@ -1902,8 +1900,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); } soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -1914,7 +1912,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) u32 tmp; int i; - WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v9_0_tiling_mode_table_init(adev); @@ -1957,7 +1955,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) */ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE, + WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE, (adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | (adev->gfx.config.sc_prim_fifo_size_backend << @@ -2024,11 +2022,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_0_init_csb(struct amdgpu_device *adev) { /* csib */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), adev->gfx.rlc.clear_state_gpu_addr >> 32); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), adev->gfx.rlc.clear_state_size); } @@ -2498,7 +2496,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].sched.ready = false; } - WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); udelay(50); } @@ -2696,9 +2694,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) int i; if (enable) { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); } else { - WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, + WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) adev->gfx.compute_ring[i].sched.ready = false; @@ -2759,9 +2757,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) @@ -2979,67 +2977,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) /* disable wptr polling */ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); /* enable doorbell? */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) break; udelay(1); } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); } /* set the pointer to the MQD */ - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); /* set MQD vmid to 0 */ - WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, mqd->cp_mqd_control); /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); /* set the wb address whether it's enabled or not */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi); /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); /* enable the doorbell if requested */ @@ -3054,19 +3052,19 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) mqd->cp_hqd_pq_doorbell_control); /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); /* set the vmid for the queue */ - WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); /* activate the queue */ - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); if (ring->use_doorbell) @@ -3083,7 +3081,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) /* disable the queue if it's active */ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) @@ -3095,21 +3093,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) DRM_DEBUG("KIQ dequeue request failed.\n"); /* Manual disable if dequeue request times out */ - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0); } - WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); return 0; } @@ -3529,6 +3527,241 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); } +static const u32 vgpr_init_compute_shader[] = +{ + 0xb07c0000, 0xbe8000ff, + 0x000000f8, 0xbf110800, + 0x7e000280, 0x7e020280, + 0x7e040280, 0x7e060280, + 0x7e080280, 0x7e0a0280, + 0x7e0c0280, 0x7e0e0280, + 0x80808800, 0xbe803200, + 0xbf84fff5, 0xbf9c0000, + 0xd28c0001, 0x0001007f, + 0xd28d0001, 0x0002027e, + 0x10020288, 0xb8810904, + 0xb7814000, 0xd1196a01, + 0x00000301, 0xbe800087, + 0xbefc00c1, 0xd89c4000, + 0x00020201, 0xd89cc080, + 0x00040401, 0x320202ff, + 0x00000800, 0x80808100, + 0xbf84fff8, 0x7e020280, + 0xbf810000, 0x00000000, +}; + +static const u32 sgpr_init_compute_shader[] = +{ + 0xb07c0000, 0xbe8000ff, + 0x0000005f, 0xbee50080, + 0xbe812c65, 0xbe822c65, + 0xbe832c65, 0xbe842c65, + 0xbe852c65, 0xb77c0005, + 0x80808500, 0xbf84fff8, + 0xbe800080, 0xbf810000, +}; + +static const struct soc15_reg_entry vgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ +}; + +static const struct soc15_reg_entry sgpr_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, +}; + +static const struct soc15_reg_entry sec_ded_counter_registers[] = { + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) }, + { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) }, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) }, + { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) }, + { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) }, +}; + +static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + int r, i, j; + unsigned total_size, vgpr_offset, sgpr_offset; + u64 gpu_addr; + + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + + /* bail if the compute ring is not ready */ + if (!ring->sched.ready) + return 0; + + total_size = + ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size += + ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size = ALIGN(total_size, 256); + vgpr_offset = total_size; + total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); + sgpr_offset = total_size; + total_size += sizeof(sgpr_init_compute_shader); + + /* allocate an indirect buffer to put the commands in */ + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, total_size, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + return r; + } + + /* load the compute shaders */ + for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) + ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; + + for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) + ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; + + /* init the ib length to 0 */ + ib.length_dw = 0; + + /* VGPR */ + /* write the register state for the compute dispatch */ + for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* SGPR */ + /* write the register state for the compute dispatch */ + for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; + } + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); + ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); + + /* write dispatch packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib.ptr[ib.length_dw++] = 128; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* write CS partial flush packet */ + ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); + ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); + + /* shedule the ib on the ring */ + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) { + DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); + goto fail; + } + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(f, false); + if (r) { + DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); + goto fail; + } + + /* read back registers to clear the counters */ + mutex_lock(&adev->grbm_idx_mutex); + for (j = 0; j < 16; j++) { + gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + } + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); + mutex_unlock(&adev->grbm_idx_mutex); + +fail: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); + + return r; +} + static int gfx_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3570,8 +3803,31 @@ static int gfx_v9_0_ecc_late_init(void *handle) return 0; } - if (*ras_if) + /* requires IBs so do in late init after IB pool is initialized */ + r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (r) + return r; + + /* handle resume path. */ + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__GFX); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ goto resume; + } *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); if (!*ras_if) @@ -3580,8 +3836,14 @@ static int gfx_v9_0_ecc_late_init(void *handle) **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__GFX); + r = 0; + } goto feature; + } ih_info.head = **ras_if; fs_info.head = **ras_if; @@ -3614,7 +3876,7 @@ interrupt: feature: kfree(*ras_if); *ras_if = NULL; - return -EINVAL; + return r; } static int gfx_v9_0_late_init(void *handle) @@ -4319,8 +4581,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); - WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); + WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -5056,13 +5318,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: - adev->gds.mem.total_size = 0x10000; + adev->gds.gds_size = 0x10000; break; case CHIP_RAVEN: - adev->gds.mem.total_size = 0x1000; + adev->gds.gds_size = 0x1000; break; default: - adev->gds.mem.total_size = 0x10000; + adev->gds.gds_size = 0x10000; break; } @@ -5086,28 +5348,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) break; } - adev->gds.gws.total_size = 64; - adev->gds.oa.total_size = 16; - - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; } static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 7bb5359d0bbd..0dc8926111e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; /* Program the AGP BAR */ - WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); /* Program the system aperture low logical page number. */ - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) @@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) * workaround that increase system aperture high address (add 1) * to get rid of the VM fault and hardware hang. */ - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18)); else - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ @@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); - WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); } static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) @@ -267,9 +267,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) * VF copy registers so vbios post doesn't program them, for * SRIOV driver need to program them */ - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE, adev->gmc.vram_start >> 24); - WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, + WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP, adev->gmc.vram_end >> 24); } @@ -303,7 +303,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 8a3b5e6fc6c9..8bf2ba310fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -289,7 +289,7 @@ out: * * @adev: amdgpu_device pointer * - * Load the GDDR MC ucode into the hw (CIK). + * Load the GDDR MC ucode into the hw (VI). * Returns 0 on success, error on failure. */ static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev) @@ -443,7 +443,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * * Set the location of vram, gart, and AGP in the GPU's - * physical address space (CIK). + * physical address space (VI). */ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) { @@ -515,7 +515,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * * Look up the amount of vram, vram width, and decide how to place - * vram and gart within the GPU's physical address space (CIK). + * vram and gart within the GPU's physical address space (VI). * Returns 0 for success. */ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) @@ -630,7 +630,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @vmid: vm instance to flush * - * Flush the TLB for the requested page table (CIK). + * Flush the TLB for the requested page table (VI). */ static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t flush_type) @@ -800,7 +800,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) * This sets up the TLBs, programs the page tables for VMID0, * sets up the hw for VMIDs 1-15 which are allocated on * demand, and sets up the global locations for the LDS, GDS, - * and GPUVM for FSA64 clients (CIK). + * and GPUVM for FSA64 clients (VI). * Returns 0 for success, errors for failure. */ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) @@ -948,7 +948,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * This disables all VM page table (CIK). + * This disables all VM page table (VI). */ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) { @@ -978,7 +978,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value * - * Print human readable fault information (CIK). + * Print human readable fault information (VI). */ static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, u32 addr, u32 mc_client, unsigned pasid) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3b7370d914a5..602593bab7a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -687,8 +687,25 @@ static int gmc_v9_0_ecc_late_init(void *handle) return 0; } /* handle resume path. */ - if (*ras_if) + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__UMC); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ goto resume; + } *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); if (!*ras_if) @@ -697,8 +714,14 @@ static int gmc_v9_0_ecc_late_init(void *handle) **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__UMC); + r = 0; + } goto feature; + } ih_info.head = **ras_if; fs_info.head = **ras_if; @@ -731,7 +754,7 @@ interrupt: feature: kfree(*ras_if); *ras_if = NULL; - return -EINVAL; + return r; } @@ -1100,6 +1123,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + if (amdgpu_virt_support_skip_setting(adev)) + break; + /* fall through */ case CHIP_VEGA20: soc15_program_register_sequence(adev, golden_settings_mmhub_1_0_0, @@ -1164,6 +1190,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); + /* After HDP is initialized, flush HDP.*/ adev->nbio_funcs->hdp_flush(adev, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 41a9a5779623..05d1d448c8f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + if (amdgpu_virt_support_skip_setting(adev)) + return; + /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset; @@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; + if (amdgpu_virt_support_skip_setting(adev)) + return; + /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); @@ -202,6 +208,9 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev) static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) { + if (amdgpu_virt_support_skip_setting(adev)) + return; + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0XFFFFFFFF); WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, @@ -338,11 +347,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) 0); WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); - /* Setup L2 cache */ - tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); + if (!amdgpu_virt_support_skip_setting(adev)) { + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); + } } /** @@ -354,6 +365,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; + + if (amdgpu_virt_support_skip_setting(adev)) + return; + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 2471e7cf75ea..31030f86be86 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -26,6 +26,7 @@ #include "nbio/nbio_6_1_sh_mask.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" +#include "mp/mp_9_0_offset.h" #include "soc15.h" #include "vega10_ih.h" #include "soc15_common.h" @@ -343,7 +344,7 @@ flr_done: /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) - && amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT) + && adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT) amdgpu_device_gpu_recover(adev, NULL); } @@ -448,6 +449,23 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); } +static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev) +{ + uint32_t rlc_fw_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); + uint32_t sos_fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + + adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY; + + if (rlc_fw_ver >= 0x5d) + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC; + + if (sos_fw_ver >= 0x80455) + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH; + + if (sos_fw_ver >= 0x8045b) + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING; +} + const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .req_full_gpu = xgpu_ai_request_full_gpu_access, .rel_full_gpu = xgpu_ai_release_full_gpu_access, @@ -456,4 +474,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .trans_msg = xgpu_ai_mailbox_trans_msg, .get_pp_clk = xgpu_ai_get_pp_clk, .force_dpm_level = xgpu_ai_force_dpm_level, + .init_reg_access_mode = xgpu_ai_init_reg_access_mode, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 1cdb98ad2db3..73419fa38159 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -29,9 +29,18 @@ #include "nbio/nbio_7_0_sh_mask.h" #include "nbio/nbio_7_0_smn.h" #include "vega10_enum.h" +#include <uapi/linux/kfd_ioctl.h> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c +static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) @@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { .ih_control = nbio_v7_0_ih_control, .init_registers = nbio_v7_0_init_registers, .detect_hw_virt = nbio_v7_0_detect_hw_virt, + .remap_hdp_registers = nbio_v7_0_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index c69d51598cfe..bfaaa327ae3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -27,9 +27,18 @@ #include "nbio/nbio_7_4_offset.h" #include "nbio/nbio_7_4_sh_mask.h" #include "nbio/nbio_7_4_0_smn.h" +#include <uapi/linux/kfd_ioctl.h> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c +static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev, struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) @@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .ih_control = nbio_v7_4_ih_control, .init_registers = nbio_v7_4_init_registers, .detect_hw_virt = nbio_v7_4_detect_hw_virt, + .remap_hdp_registers = nbio_v7_4_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 2f79765b4bdb..7f8edc66ddff 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -94,6 +94,7 @@ enum psp_gfx_cmd_id GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ + GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ }; @@ -217,6 +218,12 @@ struct psp_gfx_cmd_save_restore_ip_fw enum psp_gfx_fw_type fw_type; /* FW type */ }; +/* Command to setup register program */ +struct psp_gfx_cmd_reg_prog { + uint32_t reg_value; + uint32_t reg_id; +}; + /* All GFX ring buffer commands. */ union psp_gfx_commands { @@ -226,6 +233,7 @@ union psp_gfx_commands struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw; + struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog; }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 143f0fae69d5..3f5827764df0 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -50,6 +50,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554}; +static bool psp_v3_1_support_vmr_ring(struct psp_context *psp); +static int psp_v3_1_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type); + static int psp_v3_1_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -296,27 +300,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp, psp_v3_1_reroute_ih(psp); - /* Write low address of the ring to C2PMSG_69 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); - /* Write high address of the ring to C2PMSG_70 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); - /* Write size of ring to C2PMSG_71 */ - psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); - /* Write the ring initialization command to C2PMSG_64 */ - psp_ring_reg = ring_type; - psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + if (psp_v3_1_support_vmr_ring(psp)) { + ret = psp_v3_1_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + /* No size initialization for sriov */ + /* Write the ring initialization command to C2PMSG_101 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg); + + /* there might be hardware handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, + mmMP0_SMN_C2PMSG_101), 0x80000000, + 0x8000FFFF, false); + } else { + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be hardware handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, + mmMP0_SMN_C2PMSG_64), 0x80000000, + 0x8000FFFF, false); + } return ret; } @@ -327,16 +361,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp, unsigned int psp_ring_reg = 0; struct amdgpu_device *adev = psp->adev; - /* Write the ring destroy command to C2PMSG_64 */ - psp_ring_reg = 3 << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + if (psp_v3_1_support_vmr_ring(psp)) { + /* Write the Destroy GPCOM ring command to C2PMSG_101 */ + psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg); + + /* there might be handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command to C2PMSG_64 */ + psp_ring_reg = 3 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } return ret; } @@ -375,7 +424,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp, uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; /* KM (GPCOM) prepare write pointer */ - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + if (psp_v3_1_support_vmr_ring(psp)) + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); /* Update KM RB frame pointer to new frame */ /* write_frame ptr increments by size of rb_frame in bytes */ @@ -404,7 +456,13 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp, /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + if (psp_v3_1_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); + /* send interrupt to PSP for SRIOV ring write pointer update */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); return 0; } @@ -574,6 +632,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) return 0; } +static bool psp_v3_1_support_vmr_ring(struct psp_context *psp) +{ + if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455) + return true; + + return false; +} + static const struct psp_funcs psp_v3_1_funcs = { .init_microcode = psp_v3_1_init_microcode, .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv, @@ -586,6 +652,7 @@ static const struct psp_funcs psp_v3_1_funcs = { .compare_sram_data = psp_v3_1_compare_sram_data, .smu_reload_quirk = psp_v3_1_smu_reload_quirk, .mode1_reset = psp_v3_1_mode1_reset, + .support_vmr_ring = psp_v3_1_support_vmr_ring, }; void psp_v3_1_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 9c88ce513d78..7a259c5b6c62 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -210,12 +210,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - soc15_program_register_sequence(adev, - golden_settings_sdma_4, - ARRAY_SIZE(golden_settings_sdma_4)); - soc15_program_register_sequence(adev, - golden_settings_sdma_vg10, - ARRAY_SIZE(golden_settings_sdma_vg10)); + if (!amdgpu_virt_support_skip_setting(adev)) { + soc15_program_register_sequence(adev, + golden_settings_sdma_4, + ARRAY_SIZE(golden_settings_sdma_4)); + soc15_program_register_sequence(adev, + golden_settings_sdma_vg10, + ARRAY_SIZE(golden_settings_sdma_vg10)); + } break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -1521,8 +1523,25 @@ static int sdma_v4_0_late_init(void *handle) } /* handle resume path. */ - if (*ras_if) + if (*ras_if) { + /* resend ras TA enable cmd during resume. + * prepare to handle failure. + */ + ih_info.head = **ras_if; + r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); + if (r) { + if (r == -EAGAIN) { + /* request a gpu reset. will run again. */ + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__SDMA); + return 0; + } + /* fail to enable ras, cleanup all. */ + goto irq; + } + /* enable successfully. continue. */ goto resume; + } *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); if (!*ras_if) @@ -1531,8 +1550,14 @@ static int sdma_v4_0_late_init(void *handle) **ras_if = ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); - if (r) + if (r) { + if (r == -EAGAIN) { + amdgpu_ras_request_reset_on_boot(adev, + AMDGPU_RAS_BLOCK__SDMA); + r = 0; + } goto feature; + } ih_info.head = **ras_if; fs_info.head = **ras_if; @@ -1571,7 +1596,7 @@ interrupt: feature: kfree(*ras_if); *ras_if = NULL; - return -EINVAL; + return r; } static int sdma_v4_0_sw_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 9d8df68893b9..4ff930a47e10 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1375,6 +1375,18 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, @@ -1393,6 +1405,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .need_full_reset = &si_need_full_reset, .get_pcie_usage = &si_get_pcie_usage, .need_reset_on_init = &si_need_reset_on_init, + .get_pcie_replay_count = &si_get_pcie_replay_count, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b7e594c2bfb4..d9fdd95fd6e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -44,6 +44,7 @@ #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" #include "nbio/nbio_7_0_default.h" +#include "nbio/nbio_7_0_offset.h" #include "nbio/nbio_7_0_sh_mask.h" #include "nbio/nbio_7_0_smn.h" #include "mp/mp_9_0_offset.h" @@ -64,6 +65,9 @@ #include "dce_virtual.h" #include "mxgpu_ai.h" #include "amdgpu_smu.h" +#include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" +#include <uapi/linux/kfd_ioctl.h> #define mmMP0_MISC_CGTT_CTRL0 0x01b9 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 @@ -230,7 +234,7 @@ void soc15_grbm_select(struct amdgpu_device *adev, grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl); + WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl); } static void soc15_vga_set_state(struct amdgpu_device *adev, bool state) @@ -385,7 +389,15 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, tmp &= ~(entry->and_mask); tmp |= entry->or_mask; } - WREG32(reg, tmp); + + if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) || + reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) || + reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) || + reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG)) + WREG32_RLC(reg, tmp); + else + WREG32(reg, tmp); + } } @@ -475,6 +487,13 @@ static int soc15_asic_reset(struct amdgpu_device *adev) soc15_asic_get_baco_capability(adev, &baco_reset); else baco_reset = false; + if (baco_reset) { + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (hive || (ras && ras->supported)) + baco_reset = false; + } break; default: baco_reset = false; @@ -606,12 +625,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VEGA20: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { - if (adev->asic_type == CHIP_VEGA20) - amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - else - amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + + /* For Vega10 SR-IOV, PSP need to be initialized before IH */ + if (amdgpu_sriov_vf(adev)) { + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + else + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + } + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + } else { + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + else + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + } } amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); @@ -733,7 +764,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) /* Just return false for soc15 GPUs. Reset does not seem to * be necessary. */ - return false; + if (!amdgpu_passthrough(adev)) + return false; if (adev->flags & AMD_IS_APU) return false; @@ -748,6 +780,18 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static const struct amdgpu_asic_funcs soc15_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, @@ -765,6 +809,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .init_doorbell_index = &vega10_doorbell_index_init, .get_pcie_usage = &soc15_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, + .get_pcie_replay_count = &soc15_get_pcie_replay_count, }; static const struct amdgpu_asic_funcs vega20_asic_funcs = @@ -784,12 +829,16 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .init_doorbell_index = &vega20_doorbell_index_init, .get_pcie_usage = &soc15_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, + .get_pcie_replay_count = &soc15_get_pcie_replay_count, }; static int soc15_common_early_init(void *handle) { +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; adev->smc_rreg = NULL; adev->smc_wreg = NULL; adev->pcie_rreg = &soc15_pcie_rreg; @@ -998,11 +1047,17 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) int i; struct amdgpu_ring *ring; - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; - adev->nbio_funcs->sdma_doorbell_range(adev, i, - ring->use_doorbell, ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range); + /* Two reasons to skip + * 1, Host driver already programmed them + * 2, To avoid registers program violations in SR-IOV + */ + if (!amdgpu_virt_support_skip_setting(adev)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + adev->nbio_funcs->sdma_doorbell_range(adev, i, + ring->use_doorbell, ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); + } } adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, @@ -1019,6 +1074,12 @@ static int soc15_common_hw_init(void *handle) soc15_program_aspm(adev); /* setup nbio registers */ adev->nbio_funcs->init_registers(adev); + /* remap HDP registers to a hole in mmio space, + * for the purpose of expose those registers + * to process space + */ + if (adev->nbio_funcs->remap_hdp_registers) + adev->nbio_funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); /* HW doorbell routing policy: doorbell writing not diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index a66c8bfbbaa6..06f39f5bbf76 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -42,8 +42,18 @@ struct soc15_reg_golden { u32 or_mask; }; +struct soc15_reg_entry { + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t reg_value; +}; + #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg +#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) + #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 49c262540940..47f74dab365d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -69,26 +69,60 @@ } \ } while (0) -#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ - ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ - UVD_DPG_LMA_CTL__MASK_EN_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ - << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ - (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ - RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); }) +#define WREG32_RLC(reg, value) \ + do { \ + if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + uint32_t i = 0; \ + uint32_t retries = 50000; \ + uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \ + uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \ + uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \ + WREG32(r0, value); \ + WREG32(r1, (reg | 0x80000000)); \ + WREG32(spare_int, 0x1); \ + for (i = 0; i < retries; i++) { \ + u32 tmp = RREG32(r1); \ + if (!(tmp & 0x80000000)) \ + break; \ + udelay(10); \ + } \ + if (i >= retries) \ + pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \ + } else { \ + WREG32(reg, value); \ + } \ + } while (0) -#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ +#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \ do { \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ - WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ - UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ - ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ - << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ - (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ + if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \ + uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \ + uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \ + uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \ + if (target_reg == grbm_cntl) \ + WREG32(r2, value); \ + else if (target_reg == grbm_idx) \ + WREG32(r3, value); \ + WREG32(target_reg, value); \ + } else { \ + WREG32(target_reg, value); \ + } \ } while (0) -#endif +#define WREG32_SOC15_RLC(ip, inst, reg, value) \ + do { \ + uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\ + WREG32_RLC(target_reg, value); \ + } while (0) + +#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \ + WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \ + (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \ + & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) +#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \ + WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value) +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index c4fb58667fd4..bf3385280d3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v4_2_ring_get_rptr, .get_wptr = uvd_v4_2_ring_get_wptr, .set_wptr = uvd_v4_2_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 52bd8a654734..3210a7bd9a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -849,6 +849,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v5_0_ring_get_rptr, .get_wptr = uvd_v5_0_ring_get_wptr, .set_wptr = uvd_v5_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index c9edddf9f88a..c61a314c56cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1502,6 +1502,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, @@ -1527,6 +1528,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, @@ -1555,6 +1557,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = uvd_v6_0_enc_ring_get_rptr, .get_wptr = uvd_v6_0_enc_ring_get_wptr, .set_wptr = uvd_v6_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 2191d3d0a219..cdb96d4cb424 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1759,6 +1759,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, @@ -1791,6 +1792,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_enc_ring_get_rptr, .get_wptr = uvd_v7_0_enc_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 40363ca6c5f1..ab0cb8325796 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v2_0_ring_get_rptr, .get_wptr = vce_v2_0_ring_get_wptr, .set_wptr = vce_v2_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6ec65cf11112..36902ec16dcf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, @@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { .align_mask = 0xf, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index c0ec27991c22..e267b073f525 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1069,6 +1069,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 3dbc51f9d3b9..bb47f5b24be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr); static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); /** * vcn_v1_0_early_init - set function pointers @@ -140,7 +142,9 @@ static int vcn_v1_0_sw_init(void *handle) if (r) return r; - return r; + adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; + + return 0; } /** @@ -1204,6 +1208,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) return r; } +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state) +{ + int ret_code; + uint32_t reg_data = 0; + uint32_t reg_data2 = 0; + struct amdgpu_ring *ring; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG non-jpeg */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg non-jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.jpeg != new_state->jpeg) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); + + if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* Make sure JPRG Snoop is disabled before sending the pause */ + reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); + reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); + + /* pause DPG jpeg */ + reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.ring_jpeg; + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, + UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + + ring = &adev->vcn.ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.jpeg = new_state->jpeg; + } + + return 0; +} + static bool vcn_v1_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -2054,6 +2184,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, @@ -2087,6 +2218,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .get_rptr = vcn_v1_0_enc_ring_get_rptr, .get_wptr = vcn_v1_0_enc_ring_get_wptr, @@ -2118,6 +2250,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { .align_mask = 0xf, .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, + .no_user_fence = true, .vmhub = AMDGPU_MMHUB, .extra_dw = 64, .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 8d89ab7f0ae8..5f54acc70fec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -48,14 +48,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } adev->irq.ih.enabled = true; if (adev->irq.ih1.ring_size) { ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } adev->irq.ih1.enabled = true; } @@ -63,7 +78,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } adev->irq.ih2.enabled = true; } } @@ -81,7 +104,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } + /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); @@ -92,7 +123,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); @@ -104,7 +143,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } + /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); @@ -187,7 +235,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + } /* set the writeback address whether it's enabled or not */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, @@ -214,7 +270,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) WPTR_OVERFLOW_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + } /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); @@ -232,7 +296,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + + if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, + ih_rb_cntl)) { + DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + } /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 5e5b42a0744a..b8adf3808de2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -987,6 +987,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev) +{ + uint64_t nak_r, nak_g; + + /* Get the number of NAKs received and generated */ + nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK); + nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED); + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + static bool vi_need_reset_on_init(struct amdgpu_device *adev) { u32 clock_cntl, pc; @@ -1021,6 +1033,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .init_doorbell_index = &legacy_doorbell_index_init, .get_pcie_usage = &vi_get_pcie_usage, .need_reset_on_init = &vi_need_reset_on_init, + .get_pcie_replay_count = &vi_get_pcie_replay_count, }; #define CZ_REV_BRISTOL(rev) \ |