diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
184 files changed, 17286 insertions, 3251 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 12adca8c7819..b91e79c721e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -69,6 +69,16 @@ config DRM_AMDGPU_USERPTR This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it isn't already selected to enabled full userptr support. +config DRM_AMDGPU_WERROR + bool "Force the compiler to throw an error instead of a warning when compiling" + depends on DRM_AMDGPU + depends on EXPERT + depends on !COMPILE_TEST + default n + help + Add -Werror to the build flags for amdgpu.ko. + Only enable this if you are warning code for amdgpu.ko. + source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig" source "drivers/gpu/drm/amd/amdkfd/Kconfig" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 415a7fa395c4..8d16f280b695 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -39,6 +39,26 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \ -I$(FULL_AMD_PATH)/amdkfd +subdir-ccflags-y := -Wextra +subdir-ccflags-y += -Wunused +subdir-ccflags-y += -Wmissing-prototypes +subdir-ccflags-y += -Wmissing-declarations +subdir-ccflags-y += -Wmissing-include-dirs +subdir-ccflags-y += -Wold-style-definition +subdir-ccflags-y += -Wmissing-format-attribute +# Need this to avoid recursive variable evaluation issues +cond-flags := $(call cc-option, -Wunused-but-set-variable) \ + $(call cc-option, -Wunused-const-variable) \ + $(call cc-option, -Wstringop-truncation) \ + $(call cc-option, -Wpacked-not-aligned) +subdir-ccflags-y += $(cond-flags) +subdir-ccflags-y += -Wno-unused-parameter +subdir-ccflags-y += -Wno-type-limits +subdir-ccflags-y += -Wno-sign-compare +subdir-ccflags-y += -Wno-missing-field-initializers +subdir-ccflags-y += -Wno-override-init +subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror + amdgpu-y := amdgpu_drv.o # add KMS driver @@ -60,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -78,7 +98,7 @@ amdgpu-y += \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \ - nbio_v7_9.o + nbio_v7_9.o aqua_vanjaram_reg_init.o # add DF block amdgpu-y += \ @@ -183,12 +203,14 @@ amdgpu-y += \ vcn_v2_5.o \ vcn_v3_0.o \ vcn_v4_0.o \ + vcn_v4_0_3.o \ amdgpu_jpeg.o \ jpeg_v1_0.o \ jpeg_v2_0.o \ jpeg_v2_5.o \ jpeg_v3_0.o \ - jpeg_v4_0.o + jpeg_v4_0.o \ + jpeg_v4_0_3.o # add ATHUB block amdgpu-y += \ @@ -203,6 +225,7 @@ amdgpu-y += \ smuio_v11_0.o \ smuio_v11_0_6.o \ smuio_v13_0.o \ + smuio_v13_0_3.o \ smuio_v13_0_6.o # add reset block @@ -228,6 +251,7 @@ amdgpu-y += \ amdgpu_amdkfd_gfx_v9.o \ amdgpu_amdkfd_arcturus.o \ amdgpu_amdkfd_aldebaran.o \ + amdgpu_amdkfd_gc_9_4_3.o \ amdgpu_amdkfd_gfx_v10.o \ amdgpu_amdkfd_gfx_v10_3.o \ amdgpu_amdkfd_gfx_v11.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 02b827785e39..a84bd4a0c421 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -107,8 +107,9 @@ #include "amdgpu_fdinfo.h" #include "amdgpu_mca.h" #include "amdgpu_ras.h" +#include "amdgpu_xcp.h" -#define MAX_GPU_INSTANCE 16 +#define MAX_GPU_INSTANCE 64 struct amdgpu_gpu_instance { @@ -212,6 +213,8 @@ extern int amdgpu_noretry; extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; extern int amdgpu_use_xgmi_p2p; +extern int amdgpu_mtype_local; +extern bool enforce_isolation; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; @@ -242,9 +245,10 @@ extern int amdgpu_num_kcq; extern int amdgpu_vcnfw_log; extern int amdgpu_sg_display; +extern int amdgpu_user_partt_mode; + #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) -#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2) @@ -282,6 +286,7 @@ extern int amdgpu_sg_display; #define AMDGPU_SMARTSHIFT_MAX_BIAS (100) #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) +struct amdgpu_xcp_mgr; struct amdgpu_device; struct amdgpu_irq_src; struct amdgpu_fpriv; @@ -463,6 +468,8 @@ struct amdgpu_fpriv { struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; + /** GPU partition selection */ + uint32_t xcp_id; }; int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); @@ -573,6 +580,8 @@ struct amdgpu_asic_funcs { /* query video codecs */ int (*query_video_codecs)(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs); + /* encode "> 32bits" smn addressing */ + u64 (*encode_ext_smn_addressing)(int ext_id); }; /* @@ -607,6 +616,9 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device); typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); +typedef uint32_t (*amdgpu_rreg_ext_t)(struct amdgpu_device*, uint64_t); +typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t); + typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t); typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); @@ -657,7 +669,7 @@ enum amd_hw_ip_block_type { MAX_HWIP }; -#define HWIP_MAX_INSTANCE 28 +#define HWIP_MAX_INSTANCE 44 #define HW_ID_MAX 300 #define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv)) @@ -665,6 +677,17 @@ enum amd_hw_ip_block_type { #define IP_VERSION_MIN(ver) (((ver) >> 8) & 0xFF) #define IP_VERSION_REV(ver) ((ver) & 0xFF) +struct amdgpu_ip_map_info { + /* Map of logical to actual dev instances/mask */ + uint32_t dev_inst[MAX_HWIP][HWIP_MAX_INSTANCE]; + int8_t (*logical_to_dev_inst)(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + int8_t inst); + uint32_t (*logical_to_dev_mask)(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + uint32_t mask); +}; + struct amd_powerplay { void *pp_handle; const struct amd_pm_funcs *pp_funcs; @@ -750,6 +773,7 @@ struct amdgpu_device { struct amdgpu_acp acp; #endif struct amdgpu_hive_info *hive; + struct amdgpu_xcp_mgr *xcp_mgr; /* ASIC */ enum amd_asic_type asic_type; uint32_t family; @@ -797,6 +821,8 @@ struct amdgpu_device { amdgpu_wreg_t pcie_wreg; amdgpu_rreg_t pciep_rreg; amdgpu_wreg_t pciep_wreg; + amdgpu_rreg_ext_t pcie_rreg_ext; + amdgpu_wreg_ext_t pcie_wreg_ext; amdgpu_rreg64_t pcie_rreg64; amdgpu_wreg64_t pcie_wreg64; /* protects concurrent UVD register access */ @@ -830,7 +856,7 @@ struct amdgpu_device { dma_addr_t dummy_page_addr; struct amdgpu_vm_manager vm_manager; struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; - unsigned num_vmhubs; + DECLARE_BITMAP(vmhubs_mask, AMDGPU_MAX_VMHUBS); /* memory management */ struct amdgpu_mman mman; @@ -962,6 +988,7 @@ struct amdgpu_device { /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; + struct amdgpu_ip_map_info ip_map; /* delayed work_func for deferring clockgating during resume */ struct delayed_work delayed_init_work; @@ -1020,6 +1047,9 @@ struct amdgpu_device { struct pci_saved_state *pci_state; pci_channel_state_t pci_channel_state; + /* Track auto wait count on s_barrier settings */ + bool barrier_has_auto_waitcnt; + struct amdgpu_reset_control *reset_cntl; uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE]; @@ -1050,6 +1080,8 @@ struct amdgpu_device { bool job_hang; bool dc_enabled; + /* Mask of active clusters */ + uint32_t aid_mask; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1081,11 +1113,18 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, void *buf, size_t size, bool write); +uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, + uint32_t inst, uint32_t reg_addr, char reg_name[], + uint32_t expected_value, uint32_t mask); uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); +u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, + u64 reg_addr); void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); +void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, + u64 reg_addr, u32 reg_data); void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); @@ -1137,6 +1176,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) #define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v)) +#define RREG32_PCIE_EXT(reg) adev->pcie_rreg_ext(adev, (reg)) +#define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v)) #define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg)) #define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v)) #define RREG32_SMC(reg) adev->smc_rreg(adev, (reg)) @@ -1204,7 +1245,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); /* * ASICs macro. */ -#define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state)) +#define amdgpu_asic_set_vga_state(adev, state) \ + ((adev)->asic_funcs->set_vga_state ? (adev)->asic_funcs->set_vga_state((adev), (state)) : 0) #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev)) #define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev)) #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev)) @@ -1235,6 +1277,10 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); +#define for_each_inst(i, inst_mask) \ + for (i = ffs(inst_mask) - 1; inst_mask; \ + inst_mask &= ~(1U << i), i = ffs(inst_mask) - 1) + #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) /* Common functions */ @@ -1348,6 +1394,12 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock); /* amdgpu_acpi.c */ +struct amdgpu_numa_info { + uint64_t size; + int pxm; + int nid; +}; + /* ATCS Device/Driver State */ #define AMDGPU_ATCS_PSC_DEV_STATE_D0 0 #define AMDGPU_ATCS_PSC_DEV_STATE_D3_HOT 3 @@ -1365,15 +1417,32 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state); int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state); int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); +int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset, + u64 *tmr_size); +int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id, + struct amdgpu_numa_info *numa_info); void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps); bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev); void amdgpu_acpi_detect(void); +void amdgpu_acpi_release(void); #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } +static inline int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, + u64 *tmr_offset, u64 *tmr_size) +{ + return -EINVAL; +} +static inline int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, + int xcc_id, + struct amdgpu_numa_info *numa_info) +{ + return -EINVAL; +} static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; } static inline void amdgpu_acpi_detect(void) { } +static inline void amdgpu_acpi_release(void) { } static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state) { return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index fd6e83795873..385c6acb5728 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -26,6 +26,7 @@ #include <linux/acpi.h> #include <linux/backlight.h> #include <linux/slab.h> +#include <linux/xarray.h> #include <linux/power_supply.h> #include <linux/pm_runtime.h> #include <linux/suspend.h> @@ -38,6 +39,45 @@ #include "amd_acpi.h" #include "atom.h" +/* Declare GUID for AMD _DSM method for XCCs */ +static const guid_t amd_xcc_dsm_guid = GUID_INIT(0x8267f5d5, 0xa556, 0x44f2, + 0xb8, 0xb4, 0x45, 0x56, 0x2e, + 0x8c, 0x5b, 0xec); + +#define AMD_XCC_HID_START 3000 +#define AMD_XCC_DSM_GET_NUM_FUNCS 0 +#define AMD_XCC_DSM_GET_SUPP_MODE 1 +#define AMD_XCC_DSM_GET_XCP_MODE 2 +#define AMD_XCC_DSM_GET_VF_XCC_MAPPING 4 +#define AMD_XCC_DSM_GET_TMR_INFO 5 +#define AMD_XCC_DSM_NUM_FUNCS 5 + +#define AMD_XCC_MAX_HID 24 + +struct xarray numa_info_xa; + +/* Encapsulates the XCD acpi object information */ +struct amdgpu_acpi_xcc_info { + struct list_head list; + struct amdgpu_numa_info *numa_info; + uint8_t xcp_node; + uint8_t phy_id; + acpi_handle handle; +}; + +struct amdgpu_acpi_dev_info { + struct list_head list; + struct list_head xcc_list; + uint16_t bdf; + uint16_t supp_xcp_mode; + uint16_t xcp_mode; + uint16_t mem_mode; + uint64_t tmr_base; + uint64_t tmr_size; +}; + +struct list_head amdgpu_acpi_dev_list; + struct amdgpu_atif_notification_cfg { bool enabled; int command_code; @@ -801,6 +841,343 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta return r; } +#ifdef CONFIG_ACPI_NUMA +static inline uint64_t amdgpu_acpi_get_numa_size(int nid) +{ + /* This is directly using si_meminfo_node implementation as the + * function is not exported. + */ + int zone_type; + uint64_t managed_pages = 0; + + pg_data_t *pgdat = NODE_DATA(nid); + + for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) + managed_pages += + zone_managed_pages(&pgdat->node_zones[zone_type]); + return managed_pages * PAGE_SIZE; +} + +static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm) +{ + struct amdgpu_numa_info *numa_info; + int nid; + + numa_info = xa_load(&numa_info_xa, pxm); + + if (!numa_info) { + struct sysinfo info; + + numa_info = kzalloc(sizeof *numa_info, GFP_KERNEL); + if (!numa_info) + return NULL; + + nid = pxm_to_node(pxm); + numa_info->pxm = pxm; + numa_info->nid = nid; + + if (numa_info->nid == NUMA_NO_NODE) { + si_meminfo(&info); + numa_info->size = info.totalram * info.mem_unit; + } else { + numa_info->size = amdgpu_acpi_get_numa_size(nid); + } + xa_store(&numa_info_xa, numa_info->pxm, numa_info, GFP_KERNEL); + } + + return numa_info; +} +#endif + +/** + * amdgpu_acpi_get_node_id - obtain the NUMA node id for corresponding amdgpu + * acpi device handle + * + * @handle: acpi handle + * @numa_info: amdgpu_numa_info structure holding numa information + * + * Queries the ACPI interface to fetch the corresponding NUMA Node ID for a + * given amdgpu acpi device. + * + * Returns ACPI STATUS OK with Node ID on success or the corresponding failure reason + */ +static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle, + struct amdgpu_numa_info **numa_info) +{ +#ifdef CONFIG_ACPI_NUMA + u64 pxm; + acpi_status status; + + if (!numa_info) + return_ACPI_STATUS(AE_ERROR); + + status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm); + + if (ACPI_FAILURE(status)) + return status; + + *numa_info = amdgpu_acpi_get_numa_info(pxm); + + if (!*numa_info) + return_ACPI_STATUS(AE_ERROR); + + return_ACPI_STATUS(AE_OK); +#else + return_ACPI_STATUS(AE_NOT_EXIST); +#endif +} + +static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf) +{ + struct amdgpu_acpi_dev_info *acpi_dev; + + if (list_empty(&amdgpu_acpi_dev_list)) + return NULL; + + list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list) + if (acpi_dev->bdf == bdf) + return acpi_dev; + + return NULL; +} + +static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info, + struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf) +{ + struct amdgpu_acpi_dev_info *tmp; + union acpi_object *obj; + int ret = -ENOENT; + + *dev_info = NULL; + tmp = kzalloc(sizeof(struct amdgpu_acpi_dev_info), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + INIT_LIST_HEAD(&tmp->xcc_list); + INIT_LIST_HEAD(&tmp->list); + tmp->bdf = bdf; + + obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, + AMD_XCC_DSM_GET_SUPP_MODE, NULL, + ACPI_TYPE_INTEGER); + + if (!obj) { + acpi_handle_debug(xcc_info->handle, + "_DSM function %d evaluation failed", + AMD_XCC_DSM_GET_SUPP_MODE); + ret = -ENOENT; + goto out; + } + + tmp->supp_xcp_mode = obj->integer.value & 0xFFFF; + ACPI_FREE(obj); + + obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, + AMD_XCC_DSM_GET_XCP_MODE, NULL, + ACPI_TYPE_INTEGER); + + if (!obj) { + acpi_handle_debug(xcc_info->handle, + "_DSM function %d evaluation failed", + AMD_XCC_DSM_GET_XCP_MODE); + ret = -ENOENT; + goto out; + } + + tmp->xcp_mode = obj->integer.value & 0xFFFF; + tmp->mem_mode = (obj->integer.value >> 32) & 0xFFFF; + ACPI_FREE(obj); + + /* Evaluate DSMs and fill XCC information */ + obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, + AMD_XCC_DSM_GET_TMR_INFO, NULL, + ACPI_TYPE_PACKAGE); + + if (!obj || obj->package.count < 2) { + acpi_handle_debug(xcc_info->handle, + "_DSM function %d evaluation failed", + AMD_XCC_DSM_GET_TMR_INFO); + ret = -ENOENT; + goto out; + } + + tmp->tmr_base = obj->package.elements[0].integer.value; + tmp->tmr_size = obj->package.elements[1].integer.value; + ACPI_FREE(obj); + + DRM_DEBUG_DRIVER( + "New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ", + tmp->bdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode, + tmp->tmr_base, tmp->tmr_size); + list_add_tail(&tmp->list, &amdgpu_acpi_dev_list); + *dev_info = tmp; + + return 0; + +out: + if (obj) + ACPI_FREE(obj); + kfree(tmp); + + return ret; +} + +static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info, + u16 *bdf) +{ + union acpi_object *obj; + acpi_status status; + int ret = -ENOENT; + + obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, + AMD_XCC_DSM_GET_NUM_FUNCS, NULL, + ACPI_TYPE_INTEGER); + + if (!obj || obj->integer.value != AMD_XCC_DSM_NUM_FUNCS) + goto out; + ACPI_FREE(obj); + + /* Evaluate DSMs and fill XCC information */ + obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, + AMD_XCC_DSM_GET_VF_XCC_MAPPING, NULL, + ACPI_TYPE_INTEGER); + + if (!obj) { + acpi_handle_debug(xcc_info->handle, + "_DSM function %d evaluation failed", + AMD_XCC_DSM_GET_VF_XCC_MAPPING); + ret = -EINVAL; + goto out; + } + + /* PF xcc id [39:32] */ + xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF; + /* xcp node of this xcc [47:40] */ + xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF; + /* PF bus/dev/fn of this xcc [63:48] */ + *bdf = (obj->integer.value >> 48) & 0xFFFF; + ACPI_FREE(obj); + obj = NULL; + + status = + amdgpu_acpi_get_node_id(xcc_info->handle, &xcc_info->numa_info); + + /* TODO: check if this check is required */ + if (ACPI_SUCCESS(status)) + ret = 0; +out: + if (obj) + ACPI_FREE(obj); + + return ret; +} + +static int amdgpu_acpi_enumerate_xcc(void) +{ + struct amdgpu_acpi_dev_info *dev_info = NULL; + struct amdgpu_acpi_xcc_info *xcc_info; + struct acpi_device *acpi_dev; + char hid[ACPI_ID_LEN]; + int ret, id; + u16 bdf; + + INIT_LIST_HEAD(&amdgpu_acpi_dev_list); + xa_init(&numa_info_xa); + + for (id = 0; id < AMD_XCC_MAX_HID; id++) { + sprintf(hid, "%s%d", "AMD", AMD_XCC_HID_START + id); + acpi_dev = acpi_dev_get_first_match_dev(hid, NULL, -1); + /* These ACPI objects are expected to be in sequential order. If + * one is not found, no need to check the rest. + */ + if (!acpi_dev) { + DRM_DEBUG_DRIVER("No matching acpi device found for %s", + hid); + break; + } + + xcc_info = kzalloc(sizeof(struct amdgpu_acpi_xcc_info), + GFP_KERNEL); + if (!xcc_info) { + DRM_ERROR("Failed to allocate memory for xcc info\n"); + return -ENOMEM; + } + + INIT_LIST_HEAD(&xcc_info->list); + xcc_info->handle = acpi_device_handle(acpi_dev); + acpi_dev_put(acpi_dev); + + ret = amdgpu_acpi_get_xcc_info(xcc_info, &bdf); + if (ret) { + kfree(xcc_info); + continue; + } + + dev_info = amdgpu_acpi_get_dev(bdf); + + if (!dev_info) + ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, bdf); + + if (ret == -ENOMEM) + return ret; + + if (!dev_info) { + kfree(xcc_info); + continue; + } + + list_add_tail(&xcc_info->list, &dev_info->xcc_list); + } + + return 0; +} + +int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset, + u64 *tmr_size) +{ + struct amdgpu_acpi_dev_info *dev_info; + u16 bdf; + + if (!tmr_offset || !tmr_size) + return -EINVAL; + + bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn; + dev_info = amdgpu_acpi_get_dev(bdf); + if (!dev_info) + return -ENOENT; + + *tmr_offset = dev_info->tmr_base; + *tmr_size = dev_info->tmr_size; + + return 0; +} + +int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id, + struct amdgpu_numa_info *numa_info) +{ + struct amdgpu_acpi_dev_info *dev_info; + struct amdgpu_acpi_xcc_info *xcc_info; + u16 bdf; + + if (!numa_info) + return -EINVAL; + + bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn; + dev_info = amdgpu_acpi_get_dev(bdf); + if (!dev_info) + return -ENOENT; + + list_for_each_entry(xcc_info, &dev_info->xcc_list, list) { + if (xcc_info->phy_id == xcc_id) { + memcpy(numa_info, xcc_info->numa_info, + sizeof(*numa_info)); + return 0; + } + } + + return -ENOENT; +} + /** * amdgpu_acpi_event - handle notify events * @@ -1054,6 +1431,36 @@ void amdgpu_acpi_detect(void) } else { atif->backlight_caps.caps_valid = false; } + + amdgpu_acpi_enumerate_xcc(); +} + +void amdgpu_acpi_release(void) +{ + struct amdgpu_acpi_dev_info *dev_info, *dev_tmp; + struct amdgpu_acpi_xcc_info *xcc_info, *xcc_tmp; + struct amdgpu_numa_info *numa_info; + unsigned long index; + + xa_for_each(&numa_info_xa, index, numa_info) { + kfree(numa_info); + xa_erase(&numa_info_xa, index); + } + + if (list_empty(&amdgpu_acpi_dev_list)) + return; + + list_for_each_entry_safe(dev_info, dev_tmp, &amdgpu_acpi_dev_list, + list) { + list_for_each_entry_safe(xcc_info, xcc_tmp, &dev_info->xcc_list, + list) { + list_del(&xcc_info->list); + kfree(xcc_info); + } + + list_del(&dev_info->list); + kfree(dev_info); + } } #if IS_ENABLED(CONFIG_SUSPEND) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0385f7f69278..b4fcad0e62f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -53,7 +53,6 @@ int amdgpu_amdkfd_init(void) amdgpu_amdkfd_total_mem_size *= si.mem_unit; ret = kgd2kfd_init(); - amdgpu_amdkfd_gpuvm_init_mem_limits(); kfd_initialized = !ret; return ret; @@ -143,6 +142,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) int i; int last_valid_bit; + amdgpu_amdkfd_gpuvm_init_mem_limits(); + if (adev->kfd.dev) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = @@ -162,7 +163,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) * clear */ bitmap_complement(gpu_resources.cp_queue_bitmap, - adev->gfx.mec.queue_bitmap, + adev->gfx.mec_bitmap[0].queue_bitmap, KGD_MAX_QUEUES); /* According to linux/bitmap.h we shouldn't use bitmap_clear if @@ -427,14 +428,23 @@ uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, } void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, - struct kfd_local_mem_info *mem_info) + struct kfd_local_mem_info *mem_info, + struct amdgpu_xcp *xcp) { memset(mem_info, 0, sizeof(*mem_info)); - mem_info->local_mem_size_public = adev->gmc.visible_vram_size; - mem_info->local_mem_size_private = adev->gmc.real_vram_size - + if (xcp) { + if (adev->gmc.real_vram_size == adev->gmc.visible_vram_size) + mem_info->local_mem_size_public = + KFD_XCP_MEMORY_SIZE(adev, xcp->id); + else + mem_info->local_mem_size_private = + KFD_XCP_MEMORY_SIZE(adev, xcp->id); + } else { + mem_info->local_mem_size_public = adev->gmc.visible_vram_size; + mem_info->local_mem_size_private = adev->gmc.real_vram_size - adev->gmc.visible_vram_size; - + } mem_info->vram_width = adev->gmc.vram_width; pr_debug("Address base: %pap public 0x%llx private 0x%llx\n", @@ -497,7 +507,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, struct amdgpu_device **dmabuf_adev, uint64_t *bo_size, void *metadata_buffer, size_t buffer_size, uint32_t *metadata_size, - uint32_t *flags) + uint32_t *flags, int8_t *xcp_id) { struct dma_buf *dma_buf; struct drm_gem_object *obj; @@ -541,6 +551,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC; } + if (xcp_id) + *xcp_id = bo->xcp_id; out_put: dma_buf_put(dma_buf); @@ -732,17 +744,19 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, if (adev->family == AMDGPU_FAMILY_AI) { int i; - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); } else { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0); } return 0; } int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, enum TLB_FLUSH_TYPE flush_type) + uint16_t pasid, + enum TLB_FLUSH_TYPE flush_type, + uint32_t inst) { bool all_hub = false; @@ -750,7 +764,7 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, adev->family == AMDGPU_FAMILY_RV) all_hub = true; - return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); } bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) @@ -758,11 +772,32 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) return adev->have_atomics_support; } +void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev) +{ + amdgpu_device_flush_hdp(adev, NULL); +} + void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset) { amdgpu_umc_poison_handler(adev, reset); } +int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, + uint32_t *payload) +{ + int ret; + + /* Device or IH ring is not ready so bail. */ + ret = amdgpu_ih_wait_on_checkpoint_process_ts(adev, &adev->irq.ih); + if (ret) + return ret; + + /* Send payload to fence KFD interrupts */ + amdgpu_amdkfd_interrupt(adev, payload); + + return 0; +} + bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) { if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) @@ -770,3 +805,28 @@ bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) else return false; } + +int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) +{ + return kgd2kfd_check_and_lock_kfd(); +} + +void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev) +{ + kgd2kfd_unlock_kfd(); +} + + +u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id) +{ + u64 tmp; + s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id); + + if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) { + tmp = adev->gmc.mem_partitions[mem_id].size; + do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); + return ALIGN_DOWN(tmp, PAGE_SIZE); + } else { + return adev->gmc.real_vram_size; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 01ba3589b60a..2d0406bff84e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -30,10 +30,12 @@ #include <linux/kthread.h> #include <linux/workqueue.h> #include <linux/mmu_notifier.h> +#include <linux/memremap.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> #include "amdgpu_sync.h" #include "amdgpu_vm.h" +#include "amdgpu_xcp.h" extern uint64_t amdgpu_amdkfd_total_mem_size; @@ -97,10 +99,13 @@ struct amdgpu_amdkfd_fence { struct amdgpu_kfd_dev { struct kfd_dev *dev; - int64_t vram_used; - uint64_t vram_used_aligned; + int64_t vram_used[MAX_XCP]; + uint64_t vram_used_aligned[MAX_XCP]; bool init_complete; struct work_struct reset_work; + + /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ + struct dev_pagemap pgmap; }; enum kgd_engine_type { @@ -151,6 +156,8 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev); +int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev); +void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev); int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, @@ -160,7 +167,8 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev); int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, uint16_t vmid); int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, enum TLB_FLUSH_TYPE flush_type); + uint16_t pasid, enum TLB_FLUSH_TYPE flush_type, + uint32_t inst); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); @@ -224,7 +232,8 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem); uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, enum kgd_engine_type type); void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, - struct kfd_local_mem_info *mem_info); + struct kfd_local_mem_info *mem_info, + struct amdgpu_xcp *xcp); uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev); uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev); @@ -234,13 +243,15 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, struct amdgpu_device **dmabuf_adev, uint64_t *bo_size, void *metadata_buffer, size_t buffer_size, uint32_t *metadata_size, - uint32_t *flags); + uint32_t *flags, int8_t *xcp_id); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, struct amdgpu_device *src); int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, struct amdgpu_device *src, bool is_min); int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min); +int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, + uint32_t *payload); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when @@ -279,7 +290,8 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); -size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, + uint8_t xcp_id); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, @@ -310,6 +322,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, uint64_t *mmap_offset); int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem, struct dma_buf **dmabuf); +void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev); int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, @@ -319,9 +332,18 @@ void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 alloc_flag); + uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 alloc_flag); + uint64_t size, u32 alloc_flag, int8_t xcp_id); + +u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id); + +#define KFD_XCP_MEM_ID(adev, xcp_id) \ + ((adev)->xcp_mgr && (xcp_id) >= 0 ?\ + (adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1) + +#define KFD_XCP_MEMORY_SIZE(adev, xcp_id) amdgpu_amdkfd_xcp_memory_size((adev), (xcp_id)) + #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); @@ -352,6 +374,17 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) { } #endif + +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) +int kgd2kfd_init_zone_device(struct amdgpu_device *adev); +#else +static inline +int kgd2kfd_init_zone_device(struct amdgpu_device *adev) +{ + return 0; +} +#endif + /* KGD2KFD callbacks */ int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger); int kgd2kfd_resume_mm(struct mm_struct *mm); @@ -372,6 +405,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); +int kgd2kfd_check_and_lock_kfd(void); +void kgd2kfd_unlock_kfd(void); #else static inline int kgd2kfd_init(void) { @@ -437,5 +472,14 @@ static inline void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { } + +static inline int kgd2kfd_check_and_lock_kfd(void) +{ + return 0; +} + +static inline void kgd2kfd_unlock_kfd(void) +{ +} #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index 4485bb29bec9..60f9e027fb66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -23,6 +23,149 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd_arcturus.h" #include "amdgpu_amdkfd_gfx_v9.h" +#include "gc/gc_9_4_2_offset.h" +#include "gc/gc_9_4_2_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +/* + * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. + * + * restore_dbg_registers is ignored here but is a general interface requirement + * for devices that support GFXOFF and where the RLC save/restore list + * does not support hw registers for debugging i.e. the driver has to manually + * initialize the debug mode registers after it has disabled GFX off during the + * debug session. + */ +static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev, + bool restore_dbg_registers, + uint32_t vmid) +{ + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); + + return data; +} + +/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ +static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev, + bool keep_trap_enabled, + uint32_t vmid) +{ + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); + + return data; +} + +static int kgd_aldebaran_validate_trap_override_request(struct amdgpu_device *adev, + uint32_t trap_override, + uint32_t *trap_mask_supported) +{ + *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID | + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_FP_OVERFLOW | + KFD_DBG_TRAP_MASK_FP_UNDERFLOW | + KFD_DBG_TRAP_MASK_FP_INEXACT | + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION; + + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR && + trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE) + return -EPERM; + + return 0; +} + +/* returns TRAP_EN, EXCP_EN and EXCP_RPLACE. */ +static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t trap_override, + uint32_t trap_mask_bits, + uint32_t trap_mask_request, + uint32_t *trap_mask_prev, + uint32_t kfd_dbg_trap_cntl_prev) + +{ + uint32_t data = 0; + + *trap_mask_prev = REG_GET_FIELD(kfd_dbg_trap_cntl_prev, SPI_GDBG_PER_VMID_CNTL, EXCP_EN); + trap_mask_bits = (trap_mask_bits & trap_mask_request) | + (*trap_mask_prev & ~trap_mask_request); + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, trap_mask_bits); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override); + + return data; +} + +static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev, + uint8_t wave_launch_mode, + uint32_t vmid) +{ + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode); + + return data; +} + +#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H) +static uint32_t kgd_gfx_aldebaran_set_address_watch( + struct amdgpu_device *adev, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t watch_id, + uint32_t watch_mode, + uint32_t debug_vmid) +{ + uint32_t watch_address_high; + uint32_t watch_address_low; + uint32_t watch_address_cntl; + + watch_address_cntl = 0; + watch_address_low = lower_32_bits(watch_address); + watch_address_high = upper_32_bits(watch_address) & 0xffff; + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MODE, + watch_mode); + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MASK, + watch_address_mask >> 6); + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + VALID, + 1); + + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_high); + + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_low); + + return watch_address_cntl; +} + +static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct amdgpu_device *adev, + uint32_t watch_id) +{ + return 0; +} const struct kfd2kgd_calls aldebaran_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, @@ -42,5 +185,14 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, - .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings + .enable_debug_trap = kgd_aldebaran_enable_debug_trap, + .disable_debug_trap = kgd_aldebaran_disable_debug_trap, + .validate_trap_override_request = kgd_aldebaran_validate_trap_override_request, + .set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override, + .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode, + .set_address_watch = kgd_gfx_aldebaran_set_address_watch, + .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch, + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, + .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, + .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 4191af5a3f13..625db444df1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd_arcturus.h" +#include "amdgpu_reset.h" #include "sdma0/sdma0_4_2_2_offset.h" #include "sdma0/sdma0_4_2_2_sh_mask.h" #include "sdma1/sdma1_4_2_2_offset.h" @@ -48,6 +49,8 @@ #include "amdgpu_amdkfd_gfx_v9.h" #include "gfxhub_v1_0.h" #include "mmhub_v9_4.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" #define HQD_N_REGS 56 #define DUMP_REG(addr) do { \ @@ -276,6 +279,117 @@ int kgd_arcturus_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, return 0; } +/* + * Helper used to suspend/resume gfx pipe for image post process work to set + * barrier behaviour. + */ +static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool suspend) +{ + int i, r = 0; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + + if (!(ring && ring->sched.thread)) + continue; + + /* stop secheduler and drain ring. */ + if (suspend) { + drm_sched_stop(&ring->sched, NULL); + r = amdgpu_fence_wait_empty(ring); + if (r) + goto out; + } else { + drm_sched_start(&ring->sched, false); + } + } + +out: + /* return on resume or failure to drain rings. */ + if (!suspend || r) + return r; + + return amdgpu_device_ip_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GFX); +} + +static void set_barrier_auto_waitcnt(struct amdgpu_device *adev, bool enable_waitcnt) +{ + uint32_t data; + + WRITE_ONCE(adev->barrier_has_auto_waitcnt, enable_waitcnt); + + if (!down_read_trylock(&adev->reset_domain->sem)) + return; + + amdgpu_amdkfd_suspend(adev, false); + + if (suspend_resume_compute_scheduler(adev, true)) + goto out; + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG)); + data = REG_SET_FIELD(data, SQ_CONFIG, DISABLE_BARRIER_WAITCNT, + !enable_waitcnt); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG), data); + +out: + suspend_resume_compute_scheduler(adev, false); + + amdgpu_amdkfd_resume(adev, false); + + up_read(&adev->reset_domain->sem); +} + +/* + * restore_dbg_registers is ignored here but is a general interface requirement + * for devices that support GFXOFF and where the RLC save/restore list + * does not support hw registers for debugging i.e. the driver has to manually + * initialize the debug mode registers after it has disabled GFX off during the + * debug session. + */ +static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev, + bool restore_dbg_registers, + uint32_t vmid) +{ + mutex_lock(&adev->grbm_idx_mutex); + + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true); + + set_barrier_auto_waitcnt(adev, true); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +/* + * keep_trap_enabled is ignored here but is a general interface requirement + * for devices that support multi-process debugging where the performance + * overhead from trap temporary setup needs to be bypassed when the debug + * session has ended. + */ +static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev, + bool keep_trap_enabled, + uint32_t vmid) +{ + + mutex_lock(&adev->grbm_idx_mutex); + + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true); + + set_barrier_auto_waitcnt(adev, false); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} const struct kfd2kgd_calls arcturus_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -294,6 +408,15 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .enable_debug_trap = kgd_arcturus_enable_debug_trap, + .disable_debug_trap = kgd_arcturus_disable_debug_trap, + .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request, + .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override, + .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode, + .set_address_watch = kgd_gfx_v9_set_address_watch, + .clear_address_watch = kgd_gfx_v9_clear_address_watch, + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, + .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c new file mode 100644 index 000000000000..5b4b7f8b92a5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -0,0 +1,384 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_amdkfd_gfx_v9.h" +#include "gc/gc_9_4_3_offset.h" +#include "gc/gc_9_4_3_sh_mask.h" +#include "athub/athub_1_8_0_offset.h" +#include "athub/athub_1_8_0_sh_mask.h" +#include "oss/osssys_4_4_2_offset.h" +#include "oss/osssys_4_4_2_sh_mask.h" +#include "v9_structs.h" +#include "soc15.h" +#include "sdma/sdma_4_4_2_offset.h" +#include "sdma/sdma_4_4_2_sh_mask.h" + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t sdma_engine_reg_base = + SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, engine_id), + regSDMA_RLC0_RB_CNTL) - + regSDMA_RLC0_RB_CNTL; + uint32_t retval = sdma_engine_reg_base + + queue_id * (regSDMA_RLC1_RB_CNTL - regSDMA_RLC0_RB_CNTL); + + pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, + queue_id, retval); + return retval; +} + +static int kgd_gfx_v9_4_3_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct v9_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS); + if (data & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); + return -ETIME; + } + usleep_range(500, 1000); + } + + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, data); + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, + engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+12) +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = regSDMA_RLC0_RB_CNTL; reg <= regSDMA_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = regSDMA_RLC0_STATUS; reg <= regSDMA_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = regSDMA_RLC0_IB_SUB_REMAIN; + reg <= regSDMA_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + for (reg = regSDMA_RLC0_MIDCMD_DATA0; + reg <= regSDMA_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_rlc_reg_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_gfx_v9_4_3_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) +{ + struct v9_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_gfx_v9_4_3_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, + unsigned int utimeout) +{ + struct v9_sdma_mqd *m; + uint32_t sdma_rlc_reg_offset; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL); + temp = temp & ~SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_CONTEXT_STATUS); + if (temp & SDMA_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("SDMA RLC not idle in %s\n", __func__); + return -ETIME; + } + usleep_range(500, 1000); + } + + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_DOORBELL, 0); + WREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL, + RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_CNTL) | + SDMA_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = + RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_rlc_reg_offset + regSDMA_RLC0_RB_RPTR_HI); + + return 0; +} + +static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev, + u32 pasid, unsigned int vmid, uint32_t xcc_inst) +{ + unsigned long timeout; + unsigned int reg; + unsigned int phy_inst = GET_INST(GC, xcc_inst); + /* Every two XCCs share one AID */ + unsigned int aid = phy_inst / 2; + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished + * and the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping); + + timeout = jiffies + msecs_to_jiffies(10); + while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0, + regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << vmid))) { + if (time_after(jiffies, timeout)) { + pr_err("Fail to program VMID-PASID mapping\n"); + return -ETIME; + } + cpu_relax(); + } + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + regATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << vmid); + + reg = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX)); + /* Every 4 numbers is a cycle. 1st is AID, 2nd and 3rd are XCDs, + * and the 4th is reserved. Therefore "aid * 4 + (xcc_inst % 2) + 1" + * programs _LUT for XCC and "aid * 4" for AID where the XCC connects + * to. + */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), + aid * 4 + (phy_inst % 2) + 1); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid, + pasid_mapping); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), + aid * 4); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid, + pasid_mapping); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), reg); + + return 0; +} + +static inline struct v9_mqd *get_mqd(void *mqd) +{ + return (struct v9_mqd *)mqd; +} + +static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst) +{ + struct v9_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, hqd_base, hqd_end, data; + + m = get_mqd(mqd); + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + + /* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_MQD_BASE_ADDR); + hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI); + + for (reg = hqd_base; reg <= hqd_end; reg++) + WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + + + /* Activate doorbell logic before triggering WPTR poll. */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL), + data); + + if (wptr) { + /* Don't read wptr with get_user because the user + * context may not be accessible (if this function + * runs in a work queue). Instead trigger a one-shot + * polling read from memory in the CP. This assumes + * that wptr is GPU-accessible in the queue's VMID via + * ATC or SVM. WPTR==RPTR before starting the poll so + * the CP starts fetching new commands from the right + * place. + * + * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit + * tricky. Assume that the queue didn't overflow. The + * number of valid bits in the 32-bit RPTR depends on + * the queue size. The remaining bits are taken from + * the saved 64-bit WPTR. If the WPTR wrapped, add the + * queue size. + */ + uint32_t queue_size = + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, + CP_HQD_PQ_CONTROL, QUEUE_SIZE); + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); + + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) + guessed_wptr += queue_size; + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; + + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr)); + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr)); + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uintptr_t)wptr)); + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + upper_32_bits((uintptr_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1), + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, + queue_id)); + } + + /* Start the EOP fetcher */ + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR), + REG_SET_FIELD(m->cp_hqd_eop_rptr, + CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data); + + kgd_gfx_v9_release_queue(adev, inst); + + return 0; +} + +const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { + .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping, + .init_interrupts = kgd_gfx_v9_init_interrupts, + .hqd_load = kgd_gfx_v9_4_3_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, + .hqd_sdma_load = kgd_gfx_v9_4_3_hqd_sdma_load, + .hqd_dump = kgd_gfx_v9_hqd_dump, + .hqd_sdma_dump = kgd_gfx_v9_4_3_hqd_sdma_dump, + .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_gfx_v9_4_3_hqd_sdma_is_occupied, + .hqd_destroy = kgd_gfx_v9_hqd_destroy, + .hqd_sdma_destroy = kgd_gfx_v9_4_3_hqd_sdma_destroy, + .wave_control_execute = kgd_gfx_v9_wave_control_execute, + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, + .set_vm_context_page_table_base = + kgd_gfx_v9_set_vm_context_page_table_base, + .program_trap_handler_settings = + kgd_gfx_v9_program_trap_handler_settings +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 9378fc79e9ea..8ad7a7779e14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -21,6 +21,7 @@ */ #include "amdgpu.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_amdkfd_gfx_v10.h" #include "gc/gc_10_1_0_offset.h" #include "gc/gc_10_1_0_sh_mask.h" #include "athub/athub_2_0_0_offset.h" @@ -31,6 +32,7 @@ #include "v10_structs.h" #include "nv.h" #include "nvd.h" +#include <uapi/linux/kfd_ioctl.h> enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -79,7 +81,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases) + uint32_t sh_mem_bases, uint32_t inst) { lock_srbm(adev, 0, 0, 0, vmid); @@ -91,7 +93,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi } static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, - unsigned int vmid) + unsigned int vmid, uint32_t inst) { /* * We have to assume that there is no outstanding mapping. @@ -135,7 +137,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, * but still works */ -static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) +static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t inst) { uint32_t mec; uint32_t pipe; @@ -205,7 +208,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, - uint32_t wptr_mask, struct mm_struct *mm) + uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst) { struct v10_compute_mqd *m; uint32_t *mqd_hqd; @@ -286,9 +289,9 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, - uint32_t doorbell_off) + uint32_t doorbell_off, uint32_t inst) { - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; struct v10_compute_mqd *m; uint32_t mec, pipe; int r; @@ -303,7 +306,7 @@ static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", mec, pipe, queue_id); - spin_lock(&adev->gfx.kiq.ring_lock); + spin_lock(&adev->gfx.kiq[0].ring_lock); r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); @@ -330,7 +333,7 @@ static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, amdgpu_ring_commit(kiq_ring); out_unlock: - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&adev->gfx.kiq[0].ring_lock); release_queue(adev); return r; @@ -338,7 +341,7 @@ out_unlock: static int kgd_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs) + uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst) { uint32_t i = 0, reg; #define HQD_N_REGS 56 @@ -469,7 +472,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, uint64_t queue_address, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { uint32_t act; bool retval = false; @@ -510,7 +513,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { enum hqd_dequeue_request_type type; unsigned long end_jiffies; @@ -673,7 +676,7 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, - uint32_t sq_cmd) + uint32_t sq_cmd, uint32_t inst) { uint32_t data = 0; @@ -708,8 +711,295 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev, adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } +/* + * GFX10 helper for wave launch stall requirements on debug trap setting. + * + * vmid: + * Target VMID to stall/unstall. + * + * stall: + * 0-unstall wave launch (enable), 1-stall wave launch (disable). + * After wavefront launch has been stalled, allocated waves must drain from + * SPI in order for debug trap settings to take effect on those waves. + * This is roughly a ~3500 clock cycle wait on SPI where a read on + * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles. + * KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required. + * + * NOTE: We can afford to clear the entire STALL_VMID field on unstall + * because current GFX10 chips cannot support multi-process debugging due to + * trap configuration and masking being limited to global scope. Always + * assume single process conditions. + * + */ + +#define KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY 110 +static void kgd_gfx_v10_set_wave_launch_stall(struct amdgpu_device *adev, uint32_t vmid, bool stall) +{ + uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); + int i; + + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID, + stall ? 1 << vmid : 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); + + if (!stall) + return; + + for (i = 0; i < KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++) + RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); +} + +uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev, + bool restore_dbg_registers, + uint32_t vmid) +{ + + mutex_lock(&adev->grbm_idx_mutex); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); + + /* assume gfx off is disabled for the debug session if rlc restore not supported. */ + if (restore_dbg_registers) { + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, + VMID_SEL, 1 << vmid); + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, + TRAP_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; + } + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, + bool keep_trap_enabled, + uint32_t vmid) +{ + mutex_lock(&adev->grbm_idx_mutex); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev, + uint32_t trap_override, + uint32_t *trap_mask_supported) +{ + *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH; + + /* The SPI_GDBG_TRAP_MASK register is global and affects all + * processes. Only allow OR-ing the address-watch bit, since + * this only affects processes under the debugger. Other bits + * should stay 0 to avoid the debugger interfering with other + * processes. + */ + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR) + return -EINVAL; + + return 0; +} + +uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t trap_override, + uint32_t trap_mask_bits, + uint32_t trap_mask_request, + uint32_t *trap_mask_prev, + uint32_t kfd_dbg_trap_cntl_prev) +{ + uint32_t data, wave_cntl_prev; + + mutex_lock(&adev->grbm_idx_mutex); + + wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK)); + *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN); + + trap_mask_bits = (trap_mask_bits & trap_mask_request) | + (*trap_mask_prev & ~trap_mask_request); + + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits); + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); + + /* We need to preserve wave launch mode stall settings. */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev, + uint8_t wave_launch_mode, + uint32_t vmid) +{ + uint32_t data = 0; + bool is_mode_set = !!wave_launch_mode; + + mutex_lock(&adev->grbm_idx_mutex); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); + + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, + VMID_MASK, is_mode_set ? 1 << vmid : 0); + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, + MODE, is_mode_set ? wave_launch_mode : 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data); + + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H) +uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t watch_id, + uint32_t watch_mode, + uint32_t debug_vmid) +{ + uint32_t watch_address_high; + uint32_t watch_address_low; + uint32_t watch_address_cntl; + + watch_address_cntl = 0; + + watch_address_low = lower_32_bits(watch_address); + watch_address_high = upper_32_bits(watch_address) & 0xffff; + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + VMID, + debug_vmid); + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MODE, + watch_mode); + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MASK, + watch_address_mask >> 7); + + /* Turning off this watch point until we set all the registers */ + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + VALID, + 0); + + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_cntl); + + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_high); + + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_low); + + /* Enable the watch point */ + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + VALID, + 1); + + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_cntl); + + return 0; +} + +uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, + uint32_t watch_id) +{ + uint32_t watch_address_cntl; + + watch_address_cntl = 0; + + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_cntl); + + return 0; +} + + +/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values + * The values read are: + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. + * gws_wait_time -- Wait Count for Global Wave Syncs. + * que_sleep_wait_time -- Wait Count for Dequeue Retry. + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. + */ +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, + uint32_t *wait_times) + +{ + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); +} + +void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, + uint32_t wait_times, + uint32_t grace_period, + uint32_t *reg_offset, + uint32_t *reg_data) +{ + *reg_data = wait_times; + + /* + * The CP cannont handle a 0 grace period input and will result in + * an infinite grace period being set so set to 1 to prevent this. + */ + if (grace_period == 0) + grace_period = 1; + + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + SCH_WAVE, + grace_period); + + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); +} + static void program_trap_handler_settings(struct amdgpu_device *adev, - uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, + uint32_t inst) { lock_srbm(adev, 0, 0, 0, vmid); @@ -750,5 +1040,14 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = set_vm_context_page_table_base, + .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, + .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, + .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request, + .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override, + .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode, + .set_address_watch = kgd_gfx_v10_set_address_watch, + .clear_address_watch = kgd_gfx_v10_clear_address_watch, + .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, + .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, .program_trap_handler_settings = program_trap_handler_settings, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h new file mode 100644 index 000000000000..e6b70196071a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -0,0 +1,55 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev, + bool restore_dbg_registers, + uint32_t vmid); +uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, + bool keep_trap_enabled, + uint32_t vmid); +int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev, + uint32_t trap_override, + uint32_t *trap_mask_supported); +uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t trap_override, + uint32_t trap_mask_bits, + uint32_t trap_mask_request, + uint32_t *trap_mask_prev, + uint32_t kfd_dbg_trap_cntl_prev); +uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev, + uint8_t wave_launch_mode, + uint32_t vmid); +uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t watch_id, + uint32_t watch_mode, + uint32_t debug_vmid); +uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, + uint32_t watch_id); +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times); +void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, + uint32_t wait_times, + uint32_t grace_period, + uint32_t *reg_offset, + uint32_t *reg_data); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index ba21ec6b35e0..8c8437a4383f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -22,6 +22,7 @@ #include <linux/mmu_context.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_amdkfd_gfx_v10.h" #include "gc/gc_10_3_0_offset.h" #include "gc/gc_10_3_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" @@ -80,7 +81,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases) + uint32_t sh_mem_bases, uint32_t inst) { lock_srbm(adev, 0, 0, 0, vmid); @@ -93,7 +94,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v /* ATC is defeatured on Sienna_Cichlid */ static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid, - unsigned int vmid) + unsigned int vmid, uint32_t inst) { uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT; @@ -105,7 +106,8 @@ static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int return 0; } -static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id) +static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t inst) { uint32_t mec; uint32_t pipe; @@ -177,7 +179,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, - uint32_t wptr_mask, struct mm_struct *mm) + uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst) { struct v10_compute_mqd *m; uint32_t *mqd_hqd; @@ -273,9 +275,9 @@ static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd, static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, - uint32_t doorbell_off) + uint32_t doorbell_off, uint32_t inst) { - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; struct v10_compute_mqd *m; uint32_t mec, pipe; int r; @@ -290,7 +292,7 @@ static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd, pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", mec, pipe, queue_id); - spin_lock(&adev->gfx.kiq.ring_lock); + spin_lock(&adev->gfx.kiq[0].ring_lock); r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); @@ -317,7 +319,7 @@ static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd, amdgpu_ring_commit(kiq_ring); out_unlock: - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&adev->gfx.kiq[0].ring_lock); release_queue(adev); return r; @@ -325,7 +327,7 @@ out_unlock: static int hqd_dump_v10_3(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs) + uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst) { uint32_t i = 0, reg; #define HQD_N_REGS 56 @@ -456,7 +458,7 @@ static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev, static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev, uint64_t queue_address, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { uint32_t act; bool retval = false; @@ -498,7 +500,7 @@ static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev, static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { enum hqd_dequeue_request_type type; unsigned long end_jiffies; @@ -586,7 +588,7 @@ static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd, static int wave_control_execute_v10_3(struct amdgpu_device *adev, uint32_t gfx_index_val, - uint32_t sq_cmd) + uint32_t sq_cmd, uint32_t inst) { uint32_t data = 0; @@ -628,7 +630,8 @@ static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev, } static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev, - uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, + uint32_t inst) { lock_srbm(adev, 0, 0, 0, vmid); @@ -652,142 +655,6 @@ static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev, unlock_srbm(adev); } -#if 0 -uint32_t enable_debug_trap_v10_3(struct amdgpu_device *adev, - uint32_t trap_debug_wave_launch_mode, - uint32_t vmid) -{ - uint32_t data = 0; - uint32_t orig_wave_cntl_value; - uint32_t orig_stall_vmid; - - mutex_lock(&adev->grbm_idx_mutex); - - orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC, - 0, - mmSPI_GDBG_WAVE_CNTL)); - orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value, - SPI_GDBG_WAVE_CNTL, - STALL_VMID); - - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); - - data = 0; - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid); - - mutex_unlock(&adev->grbm_idx_mutex); - - return 0; -} - -uint32_t disable_debug_trap_v10_3(struct amdgpu_device *adev) -{ - mutex_lock(&adev->grbm_idx_mutex); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); - - mutex_unlock(&adev->grbm_idx_mutex); - - return 0; -} - -uint32_t set_wave_launch_trap_override_v10_3(struct amdgpu_device *adev, - uint32_t trap_override, - uint32_t trap_mask) -{ - uint32_t data = 0; - - mutex_lock(&adev->grbm_idx_mutex); - - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); - - data = 0; - data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, - EXCP_EN, trap_mask); - data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, - REPLACE, trap_override); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); - - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); - - mutex_unlock(&adev->grbm_idx_mutex); - - return 0; -} - -uint32_t set_wave_launch_mode_v10_3(struct amdgpu_device *adev, - uint8_t wave_launch_mode, - uint32_t vmid) -{ - uint32_t data = 0; - bool is_stall_mode; - bool is_mode_set; - - is_stall_mode = (wave_launch_mode == 4); - is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4); - - mutex_lock(&adev->grbm_idx_mutex); - - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, - VMID_MASK, is_mode_set ? 1 << vmid : 0); - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, - MODE, is_mode_set ? wave_launch_mode : 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data); - - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, - STALL_VMID, is_stall_mode ? 1 << vmid : 0); - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, - STALL_RA, is_stall_mode ? 1 : 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); - - mutex_unlock(&adev->grbm_idx_mutex); - - return 0; -} - -/* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values - * The values read are: - * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. - * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. - * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. - * gws_wait_time -- Wait Count for Global Wave Syncs. - * que_sleep_wait_time -- Wait Count for Dequeue Retry. - * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. - * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. - * deq_retry_wait_time -- Wait Count for Global Wave Syncs. - */ -void get_iq_wait_times_v10_3(struct amdgpu_device *adev, - uint32_t *wait_times) - -{ - *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); -} - -void build_grace_period_packet_info_v10_3(struct amdgpu_device *adev, - uint32_t wait_times, - uint32_t grace_period, - uint32_t *reg_offset, - uint32_t *reg_data) -{ - *reg_data = wait_times; - - *reg_data = REG_SET_FIELD(*reg_data, - CP_IQ_WAIT_TIME2, - SCH_WAVE, - grace_period); - - *reg_offset = mmCP_IQ_WAIT_TIME2; -} -#endif - const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .program_sh_mem_settings = program_sh_mem_settings_v10_3, .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3, @@ -805,12 +672,13 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3, -#if 0 - .enable_debug_trap = enable_debug_trap_v10_3, - .disable_debug_trap = disable_debug_trap_v10_3, - .set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3, - .set_wave_launch_mode = set_wave_launch_mode_v10_3, - .get_iq_wait_times = get_iq_wait_times_v10_3, - .build_grace_period_packet_info = build_grace_period_packet_info_v10_3, -#endif + .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, + .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, + .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, + .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, + .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request, + .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override, + .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode, + .set_address_watch = kgd_gfx_v10_set_address_watch, + .clear_address_watch = kgd_gfx_v10_clear_address_watch }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index 7e80caa05060..91c3574ebed3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -30,6 +30,7 @@ #include "soc15d.h" #include "v11_structs.h" #include "soc21.h" +#include <uapi/linux/kfd_ioctl.h> enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -78,7 +79,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases) + uint32_t sh_mem_bases, uint32_t inst) { lock_srbm(adev, 0, 0, 0, vmid); @@ -89,7 +90,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi } static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid, - unsigned int vmid) + unsigned int vmid, uint32_t inst) { uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT; @@ -101,7 +102,8 @@ static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int p return 0; } -static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id) +static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t inst) { uint32_t mec; uint32_t pipe; @@ -162,7 +164,7 @@ static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd) static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) + struct mm_struct *mm, uint32_t inst) { struct v11_compute_mqd *m; uint32_t *mqd_hqd; @@ -258,9 +260,9 @@ static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, - uint32_t doorbell_off) + uint32_t doorbell_off, uint32_t inst) { - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; struct v11_compute_mqd *m; uint32_t mec, pipe; int r; @@ -275,7 +277,7 @@ static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd, pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", mec, pipe, queue_id); - spin_lock(&adev->gfx.kiq.ring_lock); + spin_lock(&adev->gfx.kiq[0].ring_lock); r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); @@ -302,7 +304,7 @@ static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd, amdgpu_ring_commit(kiq_ring); out_unlock: - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&adev->gfx.kiq[0].ring_lock); release_queue(adev); return r; @@ -310,7 +312,7 @@ out_unlock: static int hqd_dump_v11(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs) + uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst) { uint32_t i = 0, reg; #define HQD_N_REGS 56 @@ -445,7 +447,7 @@ static int hqd_sdma_dump_v11(struct amdgpu_device *adev, } static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) + uint32_t pipe_id, uint32_t queue_id, uint32_t inst) { uint32_t act; bool retval = false; @@ -486,7 +488,7 @@ static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd) static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { enum hqd_dequeue_request_type type; unsigned long end_jiffies; @@ -571,7 +573,7 @@ static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd, static int wave_control_execute_v11(struct amdgpu_device *adev, uint32_t gfx_index_val, - uint32_t sq_cmd) + uint32_t sq_cmd, uint32_t inst) { uint32_t data = 0; @@ -606,6 +608,183 @@ static void set_vm_context_page_table_base_v11(struct amdgpu_device *adev, adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); } +/* + * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. + * + * restore_dbg_registers is ignored here but is a general interface requirement + * for devices that support GFXOFF and where the RLC save/restore list + * does not support hw registers for debugging i.e. the driver has to manually + * initialize the debug mode registers after it has disabled GFX off during the + * debug session. + */ +static uint32_t kgd_gfx_v11_enable_debug_trap(struct amdgpu_device *adev, + bool restore_dbg_registers, + uint32_t vmid) +{ + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); + + return data; +} + +/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ +static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev, + bool keep_trap_enabled, + uint32_t vmid) +{ + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); + + return data; +} + +static int kgd_gfx_v11_validate_trap_override_request(struct amdgpu_device *adev, + uint32_t trap_override, + uint32_t *trap_mask_supported) +{ + *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID | + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_FP_OVERFLOW | + KFD_DBG_TRAP_MASK_FP_UNDERFLOW | + KFD_DBG_TRAP_MASK_FP_INEXACT | + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION; + + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 4)) + *trap_mask_supported |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START | + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; + + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR && + trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE) + return -EPERM; + + return 0; +} + +static uint32_t trap_mask_map_sw_to_hw(uint32_t mask) +{ + uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0; + uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0; + uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID | + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_FP_OVERFLOW | + KFD_DBG_TRAP_MASK_FP_UNDERFLOW | + KFD_DBG_TRAP_MASK_FP_INEXACT | + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION); + uint32_t ret; + + ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en); + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start); + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end); + + return ret; +} + +static uint32_t trap_mask_map_hw_to_sw(uint32_t mask) +{ + uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN); + + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START)) + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START; + + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END)) + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; + + return ret; +} + +/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ +static uint32_t kgd_gfx_v11_set_wave_launch_trap_override(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t trap_override, + uint32_t trap_mask_bits, + uint32_t trap_mask_request, + uint32_t *trap_mask_prev, + uint32_t kfd_dbg_trap_cntl_prev) +{ + uint32_t data = 0; + + *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev); + + data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request); + data = trap_mask_map_sw_to_hw(data); + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override); + + return data; +} + +static uint32_t kgd_gfx_v11_set_wave_launch_mode(struct amdgpu_device *adev, + uint8_t wave_launch_mode, + uint32_t vmid) +{ + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode); + + return data; +} + +#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H) +static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t watch_id, + uint32_t watch_mode, + uint32_t debug_vmid) +{ + uint32_t watch_address_high; + uint32_t watch_address_low; + uint32_t watch_address_cntl; + + watch_address_cntl = 0; + watch_address_low = lower_32_bits(watch_address); + watch_address_high = upper_32_bits(watch_address) & 0xffff; + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MODE, + watch_mode); + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MASK, + watch_address_mask >> 7); + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + VALID, + 1); + + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_high); + + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_low); + + return watch_address_cntl; +} + +static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev, + uint32_t watch_id) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .program_sh_mem_settings = program_sh_mem_settings_v11, .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11, @@ -622,4 +801,11 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .wave_control_execute = wave_control_execute_v11, .get_atc_vmid_pasid_mapping_info = NULL, .set_vm_context_page_table_base = set_vm_context_page_table_base_v11, + .enable_debug_trap = kgd_gfx_v11_enable_debug_trap, + .disable_debug_trap = kgd_gfx_v11_disable_debug_trap, + .validate_trap_override_request = kgd_gfx_v11_validate_trap_override_request, + .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override, + .set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode, + .set_address_watch = kgd_gfx_v11_set_address_watch, + .clear_address_watch = kgd_gfx_v11_clear_address_watch }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index e83cb1c09610..6bf448ab3dff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -78,7 +78,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases) + uint32_t sh_mem_bases, uint32_t inst) { lock_srbm(adev, 0, 0, 0, vmid); @@ -91,7 +91,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi } static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, - unsigned int vmid) + unsigned int vmid, uint32_t inst) { /* * We have to assume that there is no outstanding mapping. @@ -114,7 +114,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, return 0; } -static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) +static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t inst) { uint32_t mec; uint32_t pipe; @@ -158,7 +159,7 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, - uint32_t wptr_mask, struct mm_struct *mm) + uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst) { struct cik_mqd *m; uint32_t *mqd_hqd; @@ -202,7 +203,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, static int kgd_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs) + uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst) { uint32_t i = 0, reg; #define HQD_N_REGS (35+4) @@ -318,7 +319,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, uint64_t queue_address, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { uint32_t act; bool retval = false; @@ -358,7 +359,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { uint32_t temp; enum hqd_dequeue_request_type type; @@ -494,7 +495,7 @@ static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, - uint32_t sq_cmd) + uint32_t sq_cmd, uint32_t inst) { uint32_t data; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 870f352837fc..cd06e4a6d1da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -72,7 +72,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases) + uint32_t sh_mem_bases, uint32_t inst) { lock_srbm(adev, 0, 0, 0, vmid); @@ -85,7 +85,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi } static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, - unsigned int vmid) + unsigned int vmid, uint32_t inst) { /* * We have to assume that there is no outstanding mapping. @@ -109,7 +109,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, return 0; } -static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) +static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t inst) { uint32_t mec; uint32_t pipe; @@ -153,7 +154,7 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, - uint32_t wptr_mask, struct mm_struct *mm) + uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst) { struct vi_mqd *m; uint32_t *mqd_hqd; @@ -226,7 +227,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd, static int kgd_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs) + uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst) { uint32_t i = 0, reg; #define HQD_N_REGS (54+4) @@ -350,7 +351,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, static bool kgd_hqd_is_occupied(struct amdgpu_device *adev, uint64_t queue_address, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { uint32_t act; bool retval = false; @@ -390,7 +391,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { uint32_t temp; enum hqd_dequeue_request_type type; @@ -540,7 +541,7 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, - uint32_t sq_cmd) + uint32_t sq_cmd, uint32_t inst) { uint32_t data = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index e92b93557c13..51d93fb13ea3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -38,6 +38,7 @@ #include "soc15d.h" #include "gfx_v9_0.h" #include "amdgpu_amdkfd_gfx_v9.h" +#include <uapi/linux/kfd_ioctl.h> enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -46,29 +47,29 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, - uint32_t queue, uint32_t vmid) +static void kgd_gfx_v9_lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid, uint32_t inst) { mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, mec, pipe, queue, vmid); + soc15_grbm_select(adev, mec, pipe, queue, vmid, GET_INST(GC, inst)); } -static void unlock_srbm(struct amdgpu_device *adev) +static void kgd_gfx_v9_unlock_srbm(struct amdgpu_device *adev, uint32_t inst) { - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst)); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, - uint32_t queue_id) +void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t queue_id, uint32_t inst) { uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(adev, mec, pipe, queue_id, 0); + kgd_gfx_v9_lock_srbm(adev, mec, pipe, queue_id, 0, inst); } -static uint64_t get_queue_mask(struct amdgpu_device *adev, +uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + @@ -77,28 +78,28 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev, return 1ull << bit; } -static void release_queue(struct amdgpu_device *adev) +void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst) { - unlock_srbm(adev); + kgd_gfx_v9_unlock_srbm(adev, inst); } void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases) + uint32_t sh_mem_bases, uint32_t inst) { - lock_srbm(adev, 0, 0, 0, vmid); + kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config); + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases); /* APE1 no longer exists on GFX9 */ - unlock_srbm(adev); + kgd_gfx_v9_unlock_srbm(adev, inst); } int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, - unsigned int vmid) + unsigned int vmid, uint32_t inst) { /* * We have to assume that there is no outstanding mapping. @@ -156,7 +157,8 @@ int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, * but still works */ -int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) +int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t inst) { uint32_t mec; uint32_t pipe; @@ -164,13 +166,13 @@ int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(adev, mec, pipe, 0, 0); + kgd_gfx_v9_lock_srbm(adev, mec, pipe, 0, 0, inst); - WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, + WREG32_SOC15(GC, GET_INST(GC, inst), mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(adev); + kgd_gfx_v9_unlock_srbm(adev, inst); return 0; } @@ -220,7 +222,8 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, - uint32_t wptr_mask, struct mm_struct *mm) + uint32_t wptr_mask, struct mm_struct *mm, + uint32_t inst) { struct v9_mqd *m; uint32_t *mqd_hqd; @@ -228,21 +231,22 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, m = get_mqd(mqd); - acquire_queue(adev, pipe_id, queue_id); + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; - hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + hqd_base = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR); for (reg = hqd_base; - reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++) WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL), + data); if (wptr) { /* Don't read wptr with get_user because the user @@ -271,43 +275,43 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO), lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI), upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR), lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); - WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1, - (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); + WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1, + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR), REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data); - release_queue(adev); + kgd_gfx_v9_release_queue(adev, inst); return 0; } int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, - uint32_t doorbell_off) + uint32_t doorbell_off, uint32_t inst) { - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[inst].ring; struct v9_mqd *m; uint32_t mec, pipe; int r; m = get_mqd(mqd); - acquire_queue(adev, pipe_id, queue_id); + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); @@ -315,7 +319,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", mec, pipe, queue_id); - spin_lock(&adev->gfx.kiq.ring_lock); + spin_lock(&adev->gfx.kiq[inst].ring_lock); r = amdgpu_ring_alloc(kiq_ring, 7); if (r) { pr_err("Failed to alloc KIQ (%d).\n", r); @@ -342,15 +346,15 @@ int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, amdgpu_ring_commit(kiq_ring); out_unlock: - spin_unlock(&adev->gfx.kiq.ring_lock); - release_queue(adev); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); + kgd_gfx_v9_release_queue(adev, inst); return r; } int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs) + uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst) { uint32_t i = 0, reg; #define HQD_N_REGS 56 @@ -365,13 +369,13 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, if (*dump == NULL) return -ENOMEM; - acquire_queue(adev, pipe_id, queue_id); + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); - for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); - reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + for (reg = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_MQD_BASE_ADDR); + reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++) DUMP_REG(reg); - release_queue(adev); + kgd_gfx_v9_release_queue(adev, inst); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -481,23 +485,23 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev, uint64_t queue_address, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(adev, pipe_id, queue_id); - act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + act = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); high = upper_32_bits(queue_address >> 8); - if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) && - high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) + if (low == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE) && + high == RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(adev); + kgd_gfx_v9_release_queue(adev, inst); return retval; } @@ -522,7 +526,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id) + uint32_t queue_id, uint32_t inst) { enum hqd_dequeue_request_type type; unsigned long end_jiffies; @@ -532,10 +536,10 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, if (amdgpu_in_reset(adev)) return -EIO; - acquire_queue(adev, pipe_id, queue_id); + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); if (m->cp_hqd_vmid == 0) - WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + WREG32_FIELD15_RLC(GC, GET_INST(GC, inst), RLC_CP_SCHEDULERS, scheduler1, 0); switch (reset_type) { case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: @@ -552,22 +556,22 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, break; } - WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { - temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); + temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE); if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out.\n"); - release_queue(adev); + kgd_gfx_v9_release_queue(adev, inst); return -ETIME; } usleep_range(500, 1000); } - release_queue(adev); + kgd_gfx_v9_release_queue(adev, inst); return 0; } @@ -624,14 +628,14 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, - uint32_t sq_cmd) + uint32_t sq_cmd, uint32_t inst) { uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); - WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); - WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd); + WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, gfx_index_val); + WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_CMD, sq_cmd); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); @@ -640,12 +644,271 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); - WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); + WREG32_SOC15_RLC_SHADOW(GC, GET_INST(GC, inst), mmGRBM_GFX_INDEX, data); mutex_unlock(&adev->grbm_idx_mutex); return 0; } +/* + * GFX9 helper for wave launch stall requirements on debug trap setting. + * + * vmid: + * Target VMID to stall/unstall. + * + * stall: + * 0-unstall wave launch (enable), 1-stall wave launch (disable). + * After wavefront launch has been stalled, allocated waves must drain from + * SPI in order for debug trap settings to take effect on those waves. + * This is roughly a ~96 clock cycle wait on SPI where a read on + * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles. + * KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required. + * + * NOTE: We can afford to clear the entire STALL_VMID field on unstall + * because GFX9.4.1 cannot support multi-process debugging due to trap + * configuration and masking being limited to global scope. Always assume + * single process conditions. + */ +#define KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY 3 +void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev, + uint32_t vmid, + bool stall) +{ + int i; + uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); + + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID, + stall ? 1 << vmid : 0); + else + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, + stall ? 1 : 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); + + if (!stall) + return; + + for (i = 0; i < KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++) + RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); +} + +/* + * restore_dbg_registers is ignored here but is a general interface requirement + * for devices that support GFXOFF and where the RLC save/restore list + * does not support hw registers for debugging i.e. the driver has to manually + * initialize the debug mode registers after it has disabled GFX off during the + * debug session. + */ +uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev, + bool restore_dbg_registers, + uint32_t vmid) +{ + mutex_lock(&adev->grbm_idx_mutex); + + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +/* + * keep_trap_enabled is ignored here but is a general interface requirement + * for devices that support multi-process debugging where the performance + * overhead from trap temporary setup needs to be bypassed when the debug + * session has ended. + */ +uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev, + bool keep_trap_enabled, + uint32_t vmid) +{ + mutex_lock(&adev->grbm_idx_mutex); + + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev, + uint32_t trap_override, + uint32_t *trap_mask_supported) +{ + *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH; + + /* The SPI_GDBG_TRAP_MASK register is global and affects all + * processes. Only allow OR-ing the address-watch bit, since + * this only affects processes under the debugger. Other bits + * should stay 0 to avoid the debugger interfering with other + * processes. + */ + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR) + return -EINVAL; + + return 0; +} + +uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t trap_override, + uint32_t trap_mask_bits, + uint32_t trap_mask_request, + uint32_t *trap_mask_prev, + uint32_t kfd_dbg_cntl_prev) +{ + uint32_t data, wave_cntl_prev; + + mutex_lock(&adev->grbm_idx_mutex); + + wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); + + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK)); + *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN); + + trap_mask_bits = (trap_mask_bits & trap_mask_request) | + (*trap_mask_prev & ~trap_mask_request); + + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits); + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); + + /* We need to preserve wave launch mode stall settings. */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev, + uint8_t wave_launch_mode, + uint32_t vmid) +{ + uint32_t data = 0; + bool is_mode_set = !!wave_launch_mode; + + mutex_lock(&adev->grbm_idx_mutex); + + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true); + + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, + VMID_MASK, is_mode_set ? 1 << vmid : 0); + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, + MODE, is_mode_set ? wave_launch_mode : 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data); + + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false); + + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H) +uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t watch_id, + uint32_t watch_mode, + uint32_t debug_vmid) +{ + uint32_t watch_address_high; + uint32_t watch_address_low; + uint32_t watch_address_cntl; + + watch_address_cntl = 0; + + watch_address_low = lower_32_bits(watch_address); + watch_address_high = upper_32_bits(watch_address) & 0xffff; + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + VMID, + debug_vmid); + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MODE, + watch_mode); + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MASK, + watch_address_mask >> 6); + + /* Turning off this watch point until we set all the registers */ + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + VALID, + 0); + + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_cntl); + + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_high); + + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_low); + + /* Enable the watch point */ + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + VALID, + 1); + + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_cntl); + + return 0; +} + +uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev, + uint32_t watch_id) +{ + uint32_t watch_address_cntl; + + watch_address_cntl = 0; + + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_cntl); + + return 0; +} + +/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values + * The values read are: + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. + * gws_wait_time -- Wait Count for Global Wave Syncs. + * que_sleep_wait_time -- Wait Count for Dequeue Retry. + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. + */ +void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, + uint32_t *wait_times) + +{ + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); +} + void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -682,10 +945,11 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev) * @queue_idx: Index of queue in the queue-map bit-field * @wave_cnt: Output parameter updated with number of waves in flight * @vmid: Output parameter updated with VMID of queue whose wave count - * is being collected + * is being collected + * @inst: xcc's instance number on a multi-XCC setup */ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, - int *wave_cnt, int *vmid) + int *wave_cnt, int *vmid, uint32_t inst) { int pipe_idx; int queue_slot; @@ -700,12 +964,12 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, *wave_cnt = 0; pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; - soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0); - reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + + soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst); + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot); *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; if (*wave_cnt != 0) - *vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) & + *vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) & CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT; } @@ -718,9 +982,10 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * @adev: Handle of device from which to get number of waves in flight * @pasid: Identifies the process for which this query call is invoked * @pasid_wave_cnt: Output parameter updated with number of waves in flight that - * belong to process with given pasid + * belong to process with given pasid * @max_waves_per_cu: Output parameter updated with maximum number of waves - * possible per Compute Unit + * possible per Compute Unit + * @inst: xcc's instance number on a multi-XCC setup * * Note: It's possible that the device has too many queues (oversubscription) * in which case a VMID could be remapped to a different PASID. This could lead @@ -756,7 +1021,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * Reading registers referenced above involves programming GRBM appropriately */ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, - int *pasid_wave_cnt, int *max_waves_per_cu) + int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst) { int qidx; int vmid; @@ -772,13 +1037,13 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); lock_spi_csq_mutexes(adev); - soc15_grbm_select(adev, 1, 0, 0, 0); + soc15_grbm_select(adev, 1, 0, 0, 0, inst); /* * Iterate through the shader engines and arrays of the device * to get number of waves in flight */ - bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap, + bitmap_complement(cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap, KGD_MAX_QUEUES); max_queue_cnt = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; @@ -787,8 +1052,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, for (se_idx = 0; se_idx < se_cnt; se_idx++) { for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { - amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); - queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS); + amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst); + queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS); /* * Assumption: queue map encodes following schema: four @@ -808,10 +1073,11 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, continue; /* Get number of waves in flight and aggregate them */ - get_wave_count(adev, qidx, &wave_cnt, &vmid); + get_wave_count(adev, qidx, &wave_cnt, &vmid, + inst); if (wave_cnt != 0) { pasid_tmp = - RREG32(SOC15_REG_OFFSET(OSSSYS, 0, + RREG32(SOC15_REG_OFFSET(OSSSYS, inst, mmIH_VMID_0_LUT) + vmid); if (pasid_tmp == pasid) vmid_wave_cnt += wave_cnt; @@ -820,8 +1086,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - soc15_grbm_select(adev, 0, 0, 0, 0); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst); + soc15_grbm_select(adev, 0, 0, 0, 0, inst); unlock_spi_csq_mutexes(adev); /* Update the output parameters and return */ @@ -830,28 +1096,51 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, adev->gfx.cu_info.max_waves_per_simd; } +void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, + uint32_t wait_times, + uint32_t grace_period, + uint32_t *reg_offset, + uint32_t *reg_data) +{ + *reg_data = wait_times; + + /* + * The CP cannont handle a 0 grace period input and will result in + * an infinite grace period being set so set to 1 to prevent this. + */ + if (grace_period == 0) + grace_period = 1; + + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + SCH_WAVE, + grace_period); + + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); +} + void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, - uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst) { - lock_srbm(adev, 0, 0, 0, vmid); + kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst); /* * Program TBA registers */ - WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO, + WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_LO, lower_32_bits(tba_addr >> 8)); - WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_HI, + WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_HI, upper_32_bits(tba_addr >> 8)); /* * Program TMA registers */ - WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_LO, + WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_LO, lower_32_bits(tma_addr >> 8)); - WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_HI, + WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TMA_HI, upper_32_bits(tma_addr >> 8)); - unlock_srbm(adev); + kgd_gfx_v9_unlock_srbm(adev, inst); } const struct kfd2kgd_calls gfx_v9_kfd2kgd = { @@ -871,6 +1160,15 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .enable_debug_trap = kgd_gfx_v9_enable_debug_trap, + .disable_debug_trap = kgd_gfx_v9_disable_debug_trap, + .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request, + .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override, + .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode, + .set_address_watch = kgd_gfx_v9_set_address_watch, + .clear_address_watch = kgd_gfx_v9_clear_address_watch, + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, + .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index c7ed3bc9053c..5f54bff0db49 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -20,41 +20,81 @@ * OTHER DEALINGS IN THE SOFTWARE. */ - - void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases); + uint32_t sh_mem_bases, uint32_t inst); int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, - unsigned int vmid); -int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id); + unsigned int vmid, uint32_t inst); +int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t inst); int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); + struct mm_struct *mm, uint32_t inst); int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, - uint32_t doorbell_off); + uint32_t doorbell_off, uint32_t inst); int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); + uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst); bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev, uint64_t queue_address, uint32_t pipe_id, - uint32_t queue_id); + uint32_t queue_id, uint32_t inst); int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); + uint32_t queue_id, uint32_t inst); int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, - uint32_t sq_cmd); + uint32_t sq_cmd, uint32_t inst); bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); - void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, - int *pasid_wave_cnt, int *max_waves_per_cu); + int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst); void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, - uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); + uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, + uint32_t inst); +void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t queue_id, uint32_t inst); +uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id); +void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst); +void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev, + uint32_t vmid, + bool stall); +uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev, + bool restore_dbg_registers, + uint32_t vmid); +uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev, + bool keep_trap_enabled, + uint32_t vmid); +int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev, + uint32_t trap_override, + uint32_t *trap_mask_supported); +uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev, + uint8_t wave_launch_mode, + uint32_t vmid); +uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev, + uint32_t vmid, + uint32_t trap_override, + uint32_t trap_mask_bits, + uint32_t trap_mask_request, + uint32_t *trap_mask_prev, + uint32_t kfd_dbg_trap_cntl_prev); +uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t watch_id, + uint32_t watch_mode, + uint32_t debug_vmid); +uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev, + uint32_t watch_id); +void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times); +void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, + uint32_t wait_times, + uint32_t grace_period, + uint32_t *reg_offset, + uint32_t *reg_data); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 83a83ced2439..f61527b800e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -35,7 +35,9 @@ #include "amdgpu_dma_buf.h" #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_xgmi.h" +#include "kfd_priv.h" #include "kfd_smi_events.h" +#include <drm/ttm/ttm_tt.h> /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate @@ -110,13 +112,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) struct sysinfo si; uint64_t mem; + if (kfd_mem_limit.max_system_mem_limit) + return; + si_meminfo(&si); mem = si.freeram - si.freehigh; mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); - kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); + kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT; pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), (kfd_mem_limit.max_ttm_mem_limit >> 20)); @@ -148,16 +153,20 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * @size: Size of buffer, in bytes, encapsulated by B0. This should be * equivalent to amdgpu_bo_size(BO) * @alloc_flag: Flag used in allocating a BO as noted above + * @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is + * managed as one compute node in driver for app * - * Return: returns -ENOMEM in case of error, ZERO otherwise + * Return: + * returns -ENOMEM in case of error, ZERO otherwise */ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 alloc_flag) + uint64_t size, u32 alloc_flag, int8_t xcp_id) { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0; + uint64_t vram_size = 0; system_mem_needed = 0; ttm_mem_needed = 0; @@ -172,6 +181,17 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, * 2M BO chunk. */ vram_needed = size; + /* + * For GFX 9.4.3, get the VRAM size from XCP structs + */ + if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id)) + return -EINVAL; + + vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id); + if (adev->gmc.is_app_apu) { + system_mem_needed = size; + ttm_mem_needed = size; + } } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = size; } else if (!(alloc_flag & @@ -191,8 +211,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || - (adev && adev->kfd.vram_used + vram_needed > - adev->gmc.real_vram_size - reserved_for_pt)) { + (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > + vram_size - reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -202,9 +222,11 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, */ WARN_ONCE(vram_needed && !adev, "adev reference can't be null when vram is used"); - if (adev) { - adev->kfd.vram_used += vram_needed; - adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); + if (adev && xcp_id >= 0) { + adev->kfd.vram_used[xcp_id] += vram_needed; + adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ? + vram_needed : + ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); } kfd_mem_limit.system_mem_used += system_mem_needed; kfd_mem_limit.ttm_mem_used += ttm_mem_needed; @@ -215,7 +237,7 @@ release: } void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 alloc_flag) + uint64_t size, u32 alloc_flag, int8_t xcp_id) { spin_lock(&kfd_mem_limit.mem_limit_lock); @@ -225,9 +247,19 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { WARN_ONCE(!adev, "adev reference can't be null when alloc mem flags vram is set"); + if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id)) + goto release; + if (adev) { - adev->kfd.vram_used -= size; - adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN); + adev->kfd.vram_used[xcp_id] -= size; + if (adev->gmc.is_app_apu) { + adev->kfd.vram_used_aligned[xcp_id] -= size; + kfd_mem_limit.system_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= size; + } else { + adev->kfd.vram_used_aligned[xcp_id] -= + ALIGN(size, VRAM_AVAILABLITY_ALIGN); + } } } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= size; @@ -237,8 +269,8 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); goto release; } - WARN_ONCE(adev && adev->kfd.vram_used < 0, - "KFD VRAM memory accounting unbalanced"); + WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0, + "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id); WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, "KFD TTM memory accounting unbalanced"); WARN_ONCE(kfd_mem_limit.system_mem_used < 0, @@ -254,14 +286,16 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) u32 alloc_flags = bo->kfd_bo->alloc_flags; u64 size = amdgpu_bo_size(bo); - amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags); + amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags, + bo->xcp_id); kfree(bo->kfd_bo); } /** - * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information + * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information * about USERPTR or DOOREBELL or MMIO BO. + * * @adev: Device for which dmamap BO is being created * @mem: BO of peer device that is being DMA mapped. Provides parameters * in building the dmamap BO @@ -285,7 +319,7 @@ create_dmamap_sg_bo(struct amdgpu_device *adev, ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1, AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags, - ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj); + ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0); amdgpu_bo_unreserve(mem->bo); @@ -527,6 +561,12 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) { struct ttm_operation_ctx ctx = {.interruptible = true}; struct amdgpu_bo *bo = attachment->bo_va->base.bo; + int ret; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + return ret; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -659,11 +699,10 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem, static void kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment) { - struct ttm_operation_ctx ctx = {.interruptible = true}; - struct amdgpu_bo *bo = attachment->bo_va->base.bo; - - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + /* This is a no-op. We don't want to trigger eviction fences when + * unmapping DMABufs. Therefore the invalidation (moving to system + * domain) is done in kfd_mem_dmamap_dmabuf. + */ } /** @@ -804,7 +843,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, * if peer device has large BAR. In contrast, access over xGMI is * allowed for both small and large BAR configurations of peer device */ - if ((adev != bo_adev) && + if ((adev != bo_adev && !adev->gmc.is_app_apu) && ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { @@ -1599,23 +1638,42 @@ out_unlock: return ret; } -size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, + uint8_t xcp_id) { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); ssize_t available; + uint64_t vram_available, system_mem_available, ttm_mem_available; spin_lock(&kfd_mem_limit.mem_limit_lock); - available = adev->gmc.real_vram_size - - adev->kfd.vram_used_aligned + vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) + - adev->kfd.vram_used_aligned[xcp_id] - atomic64_read(&adev->vram_pin_size) - reserved_for_pt; + + if (adev->gmc.is_app_apu) { + system_mem_available = no_system_mem_limit ? + kfd_mem_limit.max_system_mem_limit : + kfd_mem_limit.max_system_mem_limit - + kfd_mem_limit.system_mem_used; + + ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit - + kfd_mem_limit.ttm_mem_used; + + available = min3(system_mem_available, ttm_mem_available, + vram_available); + available = ALIGN_DOWN(available, PAGE_SIZE); + } else { + available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN); + } + spin_unlock(&kfd_mem_limit.mem_limit_lock); if (available < 0) available = 0; - return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN); + return available; } int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( @@ -1624,6 +1682,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( uint64_t *offset, uint32_t flags, bool criu_resume) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); + struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm); enum ttm_bo_type bo_type = ttm_bo_type_device; struct sg_table *sg = NULL; uint64_t user_addr = 0; @@ -1631,6 +1690,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct drm_gem_object *gobj = NULL; u32 domain, alloc_domain; uint64_t aligned_size; + int8_t xcp_id = -1; u64 alloc_flags; int ret; @@ -1639,9 +1699,17 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( */ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; - alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? + + if (adev->gmc.is_app_apu) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_flags = 0; + } else { + alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; + alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; + } + xcp_id = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id; } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; @@ -1693,17 +1761,19 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags); + ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags, + xcp_id); if (ret) { pr_debug("Insufficient memory\n"); goto err_reserve_limit; } - pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", - va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain)); + pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n", + va, (*mem)->aql_queue ? size << 1 : size, + domain_string(alloc_domain), xcp_id); ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags, - bo_type, NULL, &gobj); + bo_type, NULL, &gobj, xcp_id + 1); if (ret) { pr_debug("Failed to create BO on domain %s. ret %d\n", domain_string(alloc_domain), ret); @@ -1728,6 +1798,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( (*mem)->domain = domain; (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); if (user_addr) { @@ -1759,7 +1830,7 @@ err_node_allow: /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: - amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags); + amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id); err_reserve_limit: mutex_destroy(&(*mem)->lock); if (gobj) @@ -1855,11 +1926,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } /* Update the size of the BO being freed if it was allocated from - * VRAM and is not imported. + * VRAM and is not imported. For APP APU VRAM allocations are done + * in GTT domain */ if (size) { - if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) && - (!is_imported)) + if (!is_imported && + (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM || + (adev->gmc.is_app_apu && + mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT))) *size = bo_size; else *size = 0; @@ -2282,8 +2356,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, (*mem)->dmabuf = dma_buf; (*mem)->bo = bo; (*mem)->va = va; - (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); @@ -2445,7 +2520,9 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, ret = -EAGAIN; goto unlock_out; } - mem->invalid = 0; + /* set mem valid if mem has hmm range associated */ + if (mem->range) + mem->invalid = 0; } unlock_out: @@ -2577,8 +2654,15 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i list_for_each_entry_safe(mem, tmp_mem, &process_info->userptr_inval_list, validate_list.head) { - bool valid = amdgpu_ttm_tt_get_user_pages_done( - mem->bo->tbo.ttm, mem->range); + bool valid; + + /* keep mem without hmm range at userptr_inval_list */ + if (!mem->range) + continue; + + /* Only check mem with hmm range associated */ + valid = amdgpu_ttm_tt_get_user_pages_done( + mem->bo->tbo.ttm, mem->range); mem->range = NULL; if (!valid) { @@ -2586,7 +2670,12 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i ret = -EAGAIN; continue; } - WARN(mem->invalid, "Valid BO is marked invalid"); + + if (mem->invalid) { + WARN(1, "Valid BO is marked invalid"); + ret = -EAGAIN; + continue; + } list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index ac6fe0ae4609..ef4b9a41f20a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -272,6 +272,7 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, break; case ATOM_DGPU_VRAM_TYPE_HBM2: case ATOM_DGPU_VRAM_TYPE_HBM2E: + case ATOM_DGPU_VRAM_TYPE_HBM3: vram_type = AMDGPU_VRAM_TYPE_HBM; break; case ATOM_DGPU_VRAM_TYPE_GDDR6: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 30c28a69e847..b582b83c4984 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -104,9 +104,8 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev) adev->bios = NULL; vram_base = pci_resource_start(adev->pdev, 0); bios = ioremap_wc(vram_base, size); - if (!bios) { + if (!bios) return false; - } adev->bios = kmalloc(size, GFP_KERNEL); if (!adev->bios) { @@ -133,9 +132,8 @@ bool amdgpu_read_bios(struct amdgpu_device *adev) adev->bios = NULL; /* XXX: some cards may return 0 for rom size? ddx has a workaround */ bios = pci_map_rom(adev->pdev, &size); - if (!bios) { + if (!bios) return false; - } adev->bios = kzalloc(size, GFP_KERNEL); if (adev->bios == NULL) { @@ -168,9 +166,9 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev) header[AMD_VBIOS_SIGNATURE_END] = 0; if ((!AMD_IS_VALID_VBIOS(header)) || - 0 != memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET], - AMD_VBIOS_SIGNATURE, - strlen(AMD_VBIOS_SIGNATURE))) + memcmp((char *)&header[AMD_VBIOS_SIGNATURE_OFFSET], + AMD_VBIOS_SIGNATURE, + strlen(AMD_VBIOS_SIGNATURE)) != 0) return false; /* valid vbios, go on */ @@ -264,7 +262,7 @@ static int amdgpu_atrm_call(acpi_handle atrm_handle, uint8_t *bios, status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer); if (ACPI_FAILURE(status)) { - printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status)); + DRM_ERROR("failed to evaluate ATRM got %s\n", acpi_format_exception(status)); return -ENODEV; } @@ -363,7 +361,7 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev) struct acpi_table_header *hdr; acpi_size tbl_size; UEFI_ACPI_VFCT *vfct; - unsigned offset; + unsigned int offset; if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr))) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 6be30dcb029d..d34037b85cf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -593,11 +593,20 @@ static int amdgpu_connector_set_property(struct drm_connector *connector, switch (val) { default: - case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break; - case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break; - case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break; - case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break; + case DRM_MODE_SCALE_NONE: + rmx_type = RMX_OFF; + break; + case DRM_MODE_SCALE_CENTER: + rmx_type = RMX_CENTER; + break; + case DRM_MODE_SCALE_ASPECT: + rmx_type = RMX_ASPECT; + break; + case DRM_MODE_SCALE_FULLSCREEN: + rmx_type = RMX_FULL; + break; } + if (amdgpu_encoder->rmx_type == rmx_type) return 0; @@ -799,12 +808,21 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector, } switch (value) { - case DRM_MODE_SCALE_NONE: rmx_type = RMX_OFF; break; - case DRM_MODE_SCALE_CENTER: rmx_type = RMX_CENTER; break; - case DRM_MODE_SCALE_ASPECT: rmx_type = RMX_ASPECT; break; + case DRM_MODE_SCALE_NONE: + rmx_type = RMX_OFF; + break; + case DRM_MODE_SCALE_CENTER: + rmx_type = RMX_CENTER; + break; + case DRM_MODE_SCALE_ASPECT: + rmx_type = RMX_ASPECT; + break; default: - case DRM_MODE_SCALE_FULLSCREEN: rmx_type = RMX_FULL; break; + case DRM_MODE_SCALE_FULLSCREEN: + rmx_type = RMX_FULL; + break; } + if (amdgpu_encoder->rmx_type == rmx_type) return 0; @@ -1127,7 +1145,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) /* assume digital unless load detected otherwise */ amdgpu_connector->use_digital = true; lret = encoder_funcs->detect(encoder, connector); - DRM_DEBUG_KMS("load_detect %x returned: %x\n",encoder->encoder_type,lret); + DRM_DEBUG_KMS("load_detect %x returned: %x\n", + encoder->encoder_type, lret); if (lret == connector_status_connected) amdgpu_connector->use_digital = false; } @@ -1991,7 +2010,7 @@ amdgpu_connector_add(struct amdgpu_device *adev, if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) { if (i2c_bus->valid) { connector->polled = DRM_CONNECTOR_POLL_CONNECT | - DRM_CONNECTOR_POLL_DISCONNECT; + DRM_CONNECTOR_POLL_DISCONNECT; } } else connector->polled = DRM_CONNECTOR_POLL_HPD; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2eb2c66843a8..d9503882ea97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -112,6 +112,9 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, if (r < 0) return r; + if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type)) + return -EINVAL; + ++(num_ibs[r]); p->gang_leader_idx = r; return 0; @@ -192,7 +195,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, uint64_t *chunk_array_user; uint64_t *chunk_array; uint32_t uf_offset = 0; - unsigned int size; + size_t size; int ret; int i; @@ -285,6 +288,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: break; default: @@ -305,7 +309,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } p->gang_leader = p->jobs[p->gang_leader_idx]; - if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) { + if (p->ctx->generation != p->gang_leader->generation) { ret = -ECANCELED; goto free_all_kdata; } @@ -393,7 +397,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, { struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata; struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - unsigned num_deps; + unsigned int num_deps; int i, r; num_deps = chunk->length_dw * 4 / @@ -464,7 +468,7 @@ static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; - unsigned num_deps; + unsigned int num_deps; int i, r; num_deps = chunk->length_dw * 4 / @@ -482,7 +486,7 @@ static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; - unsigned num_deps; + unsigned int num_deps; int i, r; num_deps = chunk->length_dw * 4 / @@ -502,7 +506,7 @@ static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata; - unsigned num_deps; + unsigned int num_deps; int i; num_deps = chunk->length_dw * 4 / @@ -536,7 +540,7 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, struct amdgpu_cs_chunk *chunk) { struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata; - unsigned num_deps; + unsigned int num_deps; int i; num_deps = chunk->length_dw * 4 / @@ -575,6 +579,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, return 0; } +static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata; + int i; + + if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW) + return -EINVAL; + + for (i = 0; i < p->gang_size; ++i) { + p->jobs[i]->shadow_va = shadow->shadow_va; + p->jobs[i]->csa_va = shadow->csa_va; + p->jobs[i]->gds_va = shadow->gds_va; + p->jobs[i]->init_shadow = + shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW; + } + + return 0; +} + static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) { unsigned int ce_preempt = 0, de_preempt = 0; @@ -617,6 +641,11 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) if (r) return r; break; + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: + r = amdgpu_cs_p2_shadow(p, chunk); + if (r) + return r; + break; } } @@ -729,6 +758,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, if (used_vis_vram < total_vis_vram) { u64 free_vis_vram = total_vis_vram - used_vis_vram; + adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + increment_us, us_upper_bound); @@ -1047,9 +1077,8 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, /* the IB should be reserved at this point */ r = amdgpu_bo_kmap(aobj, (void **)&kptr); - if (r) { + if (r) return r; - } kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE); @@ -1356,7 +1385,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, /* Cleanup the parser structure */ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) { - unsigned i; + unsigned int i; amdgpu_sync_free(&parser->sync); for (i = 0; i < parser->num_post_deps; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index c6d4d41c4393..23d054526e7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -106,3 +106,41 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, ttm_eu_backoff_reservation(&ticket, &list); return 0; } + +int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va, + uint64_t csa_addr) +{ + struct ww_acquire_ctx ticket; + struct list_head list; + struct amdgpu_bo_list_entry pd; + struct ttm_validate_buffer csa_tv; + int r; + + INIT_LIST_HEAD(&list); + INIT_LIST_HEAD(&csa_tv.head); + csa_tv.bo = &bo->tbo; + csa_tv.num_shared = 1; + + list_add(&csa_tv.head, &list); + amdgpu_vm_get_pd_bo(vm, &list, &pd); + + r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); + if (r) { + DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); + return r; + } + + r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr); + if (r) { + DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r); + ttm_eu_backoff_reservation(&ticket, &list); + return r; + } + + amdgpu_vm_bo_del(adev, bo_va); + + ttm_eu_backoff_reservation(&ticket, &list); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h index 524b4437a021..7dfc1f2012eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h @@ -34,6 +34,9 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, uint64_t csa_addr, uint32_t size); +int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va, + uint64_t csa_addr); void amdgpu_free_static_csa(struct amdgpu_bo **bo); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index d2139ac12159..0dc9c655c4fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -222,8 +222,19 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio); hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM); - scheds = adev->gpu_sched[hw_ip][hw_prio].sched; - num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; + + if (!(adev)->xcp_mgr) { + scheds = adev->gpu_sched[hw_ip][hw_prio].sched; + num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds; + } else { + struct amdgpu_fpriv *fpriv; + + fpriv = container_of(ctx->ctx_mgr, struct amdgpu_fpriv, ctx_mgr); + r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv, + &num_scheds, &scheds); + if (r) + goto cleanup_entity; + } /* disable load balance if the hw engine retains context among dependent jobs */ if (hw_ip == AMDGPU_HW_IP_VCN_ENC || @@ -255,7 +266,8 @@ error_free_entity: return r; } -static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) +static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_device *adev, + struct amdgpu_ctx_entity *entity) { ktime_t res = ns_to_ktime(0); int i; @@ -268,6 +280,8 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) dma_fence_put(entity->fences[i]); } + amdgpu_xcp_release_sched(adev, entity); + kfree(entity); return res; } @@ -303,6 +317,7 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, struct drm_file *filp, struct amdgpu_ctx *ctx) { + struct amdgpu_fpriv *fpriv = filp->driver_priv; u32 current_stable_pstate; int r; @@ -318,7 +333,7 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); + ctx->generation = amdgpu_vm_generation(mgr->adev, &fpriv->vm); ctx->init_priority = priority; ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; @@ -331,6 +346,7 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, else ctx->stable_pstate = current_stable_pstate; + ctx->ctx_mgr = &(fpriv->ctx_mgr); return 0; } @@ -399,7 +415,7 @@ static void amdgpu_ctx_fini(struct kref *ref) for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) { ktime_t spend; - spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]); + spend = amdgpu_ctx_fini_entity(adev, ctx->entities[i][j]); atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]); } } @@ -416,6 +432,7 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, u32 ring, struct drm_sched_entity **entity) { int r; + struct drm_sched_entity *ctx_entity; if (hw_ip >= AMDGPU_HW_IP_NUM) { DRM_ERROR("unknown HW IP type: %d\n", hw_ip); @@ -439,7 +456,14 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, return r; } - *entity = &ctx->entities[hw_ip][ring]->entity; + ctx_entity = &ctx->entities[hw_ip][ring]->entity; + r = drm_sched_entity_error(ctx_entity); + if (r) { + DRM_DEBUG("error entity %p\n", ctx_entity); + return r; + } + + *entity = ctx_entity; return 0; } @@ -570,12 +594,15 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET; - if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) + if (ctx->generation != amdgpu_vm_generation(adev, &fpriv->vm)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; if (atomic_read(&ctx->guilty)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; + if (amdgpu_in_reset(adev)) + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS; + if (adev->ras_enabled && con) { /* Return the cached values in O(1), * and schedule delayed work to cache diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 0fa0e56daf67..85376baaa92f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -47,7 +47,7 @@ struct amdgpu_ctx { struct amdgpu_ctx_mgr *mgr; unsigned reset_counter; unsigned reset_counter_query; - uint32_t vram_lost_counter; + uint64_t generation; spinlock_t ring_lock; struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM]; bool preamble_presented; @@ -57,6 +57,7 @@ struct amdgpu_ctx { unsigned long ras_counter_ce; unsigned long ras_counter_ue; uint32_t stable_pstate; + struct amdgpu_ctx_mgr *ctx_mgr; }; struct amdgpu_ctx_mgr { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f60753f97ac5..56e89e76ff17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -56,14 +56,14 @@ * * Bit 62: Indicates a GRBM bank switch is needed * Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is - * zero) + * zero) * Bits 24..33: The SE or ME selector if needed * Bits 34..43: The SH (or SA) or PIPE selector if needed * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed * * Bit 23: Indicates that the PM power gating lock should be held - * This is necessary to read registers that might be - * unreliable during a power gating transistion. + * This is necessary to read registers that might be + * unreliable during a power gating transistion. * * The lower bits are the BYTE offset of the register to read. This * allows reading multiple registers in a single call and having @@ -76,7 +76,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, ssize_t result = 0; int r; bool pm_pg_lock, use_bank, use_ring; - unsigned instance_bank, sh_bank, se_bank, me, pipe, queue, vmid; + unsigned int instance_bank, sh_bank, se_bank, me, pipe, queue, vmid; pm_pg_lock = use_bank = use_ring = false; instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0; @@ -136,10 +136,10 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, } mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, se_bank, - sh_bank, instance_bank); + sh_bank, instance_bank, 0); } else if (use_ring) { mutex_lock(&adev->srbm_mutex); - amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid); + amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid, 0); } if (pm_pg_lock) @@ -169,10 +169,10 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, end: if (use_bank) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } else if (use_ring) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } @@ -208,7 +208,7 @@ static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file) { struct amdgpu_debugfs_regs2_data *rd; - rd = kzalloc(sizeof *rd, GFP_KERNEL); + rd = kzalloc(sizeof(*rd), GFP_KERNEL); if (!rd) return -ENOMEM; rd->adev = file_inode(file)->i_private; @@ -221,6 +221,7 @@ static int amdgpu_debugfs_regs2_open(struct inode *inode, struct file *file) static int amdgpu_debugfs_regs2_release(struct inode *inode, struct file *file) { struct amdgpu_debugfs_regs2_data *rd = file->private_data; + mutex_destroy(&rd->lock); kfree(file->private_data); return 0; @@ -262,14 +263,14 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off } mutex_lock(&adev->grbm_idx_mutex); amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se, - rd->id.grbm.sh, - rd->id.grbm.instance); + rd->id.grbm.sh, + rd->id.grbm.instance, rd->id.xcc_id); } if (rd->id.use_srbm) { mutex_lock(&adev->srbm_mutex); amdgpu_gfx_select_me_pipe_q(adev, rd->id.srbm.me, rd->id.srbm.pipe, - rd->id.srbm.queue, rd->id.srbm.vmid); + rd->id.srbm.queue, rd->id.srbm.vmid, rd->id.xcc_id); } if (rd->id.pg_lock) @@ -295,12 +296,12 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off } end: if (rd->id.use_grbm) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, rd->id.xcc_id); mutex_unlock(&adev->grbm_idx_mutex); } if (rd->id.use_srbm) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, rd->id.xcc_id); mutex_unlock(&adev->srbm_mutex); } @@ -319,18 +320,45 @@ end: static long amdgpu_debugfs_regs2_ioctl(struct file *f, unsigned int cmd, unsigned long data) { struct amdgpu_debugfs_regs2_data *rd = f->private_data; + struct amdgpu_debugfs_regs2_iocdata v1_data; int r; + mutex_lock(&rd->lock); + switch (cmd) { + case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2: + r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata_v2 *)data, + sizeof(rd->id)); + if (r) + r = -EINVAL; + goto done; case AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE: - mutex_lock(&rd->lock); - r = copy_from_user(&rd->id, (struct amdgpu_debugfs_regs2_iocdata *)data, sizeof rd->id); - mutex_unlock(&rd->lock); - return r ? -EINVAL : 0; + r = copy_from_user(&v1_data, (struct amdgpu_debugfs_regs2_iocdata *)data, + sizeof(v1_data)); + if (r) { + r = -EINVAL; + goto done; + } + goto v1_copy; default: - return -EINVAL; - } - return 0; + r = -EINVAL; + goto done; + } + +v1_copy: + rd->id.use_srbm = v1_data.use_srbm; + rd->id.use_grbm = v1_data.use_grbm; + rd->id.pg_lock = v1_data.pg_lock; + rd->id.grbm.se = v1_data.grbm.se; + rd->id.grbm.sh = v1_data.grbm.sh; + rd->id.grbm.instance = v1_data.grbm.instance; + rd->id.srbm.me = v1_data.srbm.me; + rd->id.srbm.pipe = v1_data.srbm.pipe; + rd->id.srbm.queue = v1_data.srbm.queue; + rd->id.xcc_id = 0; +done: + mutex_unlock(&rd->lock); + return r; } static ssize_t amdgpu_debugfs_regs2_read(struct file *f, char __user *buf, size_t size, loff_t *pos) @@ -343,6 +371,136 @@ static ssize_t amdgpu_debugfs_regs2_write(struct file *f, const char __user *buf return amdgpu_debugfs_regs2_op(f, (char __user *)buf, *pos, size, 1); } +static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file) +{ + struct amdgpu_debugfs_gprwave_data *rd; + + rd = kzalloc(sizeof *rd, GFP_KERNEL); + if (!rd) + return -ENOMEM; + rd->adev = file_inode(file)->i_private; + file->private_data = rd; + mutex_init(&rd->lock); + + return 0; +} + +static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file) +{ + struct amdgpu_debugfs_gprwave_data *rd = file->private_data; + mutex_destroy(&rd->lock); + kfree(file->private_data); + return 0; +} + +static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, size_t size, loff_t *pos) +{ + struct amdgpu_debugfs_gprwave_data *rd = f->private_data; + struct amdgpu_device *adev = rd->adev; + ssize_t result = 0; + int r; + uint32_t *data, x; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + r = amdgpu_virt_enable_access_debugfs(adev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + data = kcalloc(1024, sizeof(*data), GFP_KERNEL); + if (!data) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + amdgpu_virt_disable_access_debugfs(adev); + return -ENOMEM; + } + + /* switch to the specific se/sh/cu */ + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, rd->id.se, rd->id.sh, rd->id.cu, rd->id.xcc_id); + + if (!rd->id.gpr_or_wave) { + x = 0; + if (adev->gfx.funcs->read_wave_data) + adev->gfx.funcs->read_wave_data(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, data, &x); + } else { + x = size >> 2; + if (rd->id.gpr.vpgr_or_sgpr) { + if (adev->gfx.funcs->read_wave_vgprs) + adev->gfx.funcs->read_wave_vgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, rd->id.gpr.thread, *pos, size>>2, data); + } else { + if (adev->gfx.funcs->read_wave_sgprs) + adev->gfx.funcs->read_wave_sgprs(adev, rd->id.xcc_id, rd->id.simd, rd->id.wave, *pos, size>>2, data); + } + } + + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, rd->id.xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + if (!x) { + result = -EINVAL; + goto done; + } + + while (size && (*pos < x * 4)) { + uint32_t value; + + value = data[*pos >> 2]; + r = put_user(value, (uint32_t *)buf); + if (r) { + result = r; + goto done; + } + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + +done: + amdgpu_virt_disable_access_debugfs(adev); + kfree(data); + return result; +} + +static long amdgpu_debugfs_gprwave_ioctl(struct file *f, unsigned int cmd, unsigned long data) +{ + struct amdgpu_debugfs_gprwave_data *rd = f->private_data; + int r = 0; + + mutex_lock(&rd->lock); + + switch (cmd) { + case AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE: + if (copy_from_user(&rd->id, + (struct amdgpu_debugfs_gprwave_iocdata *)data, + sizeof(rd->id))) + r = -EFAULT; + goto done; + default: + r = -EINVAL; + goto done; + } + +done: + mutex_unlock(&rd->lock); + return r; +} + + + /** * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register @@ -863,7 +1021,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, * The offset being sought changes which wave that the status data * will be returned for. The bits are used as follows: * - * Bits 0..6: Byte offset into data + * Bits 0..6: Byte offset into data * Bits 7..14: SE selector * Bits 15..22: SH/SA selector * Bits 23..30: CU/{WGP+SIMD} selector @@ -907,13 +1065,13 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se, sh, cu); + amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0); x = 0; if (adev->gfx.funcs->read_wave_data) - adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x); + adev->gfx.funcs->read_wave_data(adev, 0, simd, wave, data, &x); - amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); mutex_unlock(&adev->grbm_idx_mutex); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); @@ -1001,17 +1159,17 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, /* switch to the specific se/sh/cu */ mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se, sh, cu); + amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0); if (bank == 0) { if (adev->gfx.funcs->read_wave_vgprs) - adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data); + adev->gfx.funcs->read_wave_vgprs(adev, 0, simd, wave, thread, offset, size>>2, data); } else { if (adev->gfx.funcs->read_wave_sgprs) - adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data); + adev->gfx.funcs->read_wave_sgprs(adev, 0, simd, wave, offset, size>>2, data); } - amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0); mutex_unlock(&adev->grbm_idx_mutex); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); @@ -1339,6 +1497,15 @@ static const struct file_operations amdgpu_debugfs_regs2_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gprwave_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = amdgpu_debugfs_gprwave_ioctl, + .read = amdgpu_debugfs_gprwave_read, + .open = amdgpu_debugfs_gprwave_open, + .release = amdgpu_debugfs_gprwave_release, + .llseek = default_llseek +}; + static const struct file_operations amdgpu_debugfs_regs_fops = { .owner = THIS_MODULE, .read = amdgpu_debugfs_regs_read, @@ -1416,6 +1583,7 @@ static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = { static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, + &amdgpu_debugfs_gprwave_fops, &amdgpu_debugfs_regs_didt_fops, &amdgpu_debugfs_regs_pcie_fops, &amdgpu_debugfs_regs_smc_fops, @@ -1429,9 +1597,10 @@ static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_gfxoff_residency_fops, }; -static const char *debugfs_regs_names[] = { +static const char * const debugfs_regs_names[] = { "amdgpu_regs", "amdgpu_regs2", + "amdgpu_gprwave", "amdgpu_regs_didt", "amdgpu_regs_pcie", "amdgpu_regs_smc", @@ -1447,7 +1616,7 @@ static const char *debugfs_regs_names[] = { /** * amdgpu_debugfs_regs_init - Initialize debugfs entries that provide - * register access. + * register access. * * @adev: The device to attach the debugfs entries to */ @@ -1459,7 +1628,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { ent = debugfs_create_file(debugfs_regs_names[i], - S_IFREG | S_IRUGO, root, + S_IFREG | 0444, root, adev, debugfs_regs[i]); if (!i && !IS_ERR_OR_NULL(ent)) i_size_write(ent->d_inode, adev->rmmio_size); @@ -1470,7 +1639,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) { - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct amdgpu_device *adev = m->private; struct drm_device *dev = adev_to_drm(adev); int r = 0, i; @@ -1494,12 +1663,12 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) kthread_park(ring->sched.thread); } - seq_printf(m, "run ib test:\n"); + seq_puts(m, "run ib test:\n"); r = amdgpu_ib_ring_tests(adev); if (r) seq_printf(m, "ib ring tests failed (%d).\n", r); else - seq_printf(m, "ib ring tests passed.\n"); + seq_puts(m, "ib ring tests passed.\n"); /* go on the scheduler */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { @@ -1581,7 +1750,7 @@ static int amdgpu_debugfs_benchmark(void *data, u64 val) static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused) { - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct amdgpu_device *adev = m->private; struct drm_device *dev = adev_to_drm(adev); struct drm_file *file; int r; @@ -1978,7 +2147,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_ring_init(adev, ring); } - for ( i = 0; i < adev->vcn.num_vcn_inst; i++) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (!amdgpu_vcnfw_log) break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5c7d40873ee2..e25f085ee886 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -707,6 +707,48 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, return r; } +u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, + u64 reg_addr) +{ + unsigned long flags, pcie_index, pcie_index_hi, pcie_data; + u32 r; + void __iomem *pcie_index_offset; + void __iomem *pcie_index_hi_offset; + void __iomem *pcie_data_offset; + + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + if (adev->nbio.funcs->get_pcie_index_hi_offset) + pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); + else + pcie_index_hi = 0; + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + if (pcie_index_hi != 0) + pcie_index_hi_offset = (void __iomem *)adev->rmmio + + pcie_index_hi * 4; + + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + r = readl(pcie_data_offset); + + /* clear the high bits */ + if (pcie_index_hi != 0) { + writel(0, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + + return r; +} + /** * amdgpu_device_indirect_rreg64 - read a 64bits indirect register * @@ -747,8 +789,6 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, * amdgpu_device_indirect_wreg - write an indirect register address * * @adev: amdgpu_device pointer - * @pcie_index: mmio register offset - * @pcie_data: mmio register offset * @reg_addr: indirect register offset * @reg_data: indirect register data * @@ -774,12 +814,50 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } +void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, + u64 reg_addr, u32 reg_data) +{ + unsigned long flags, pcie_index, pcie_index_hi, pcie_data; + void __iomem *pcie_index_offset; + void __iomem *pcie_index_hi_offset; + void __iomem *pcie_data_offset; + + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + if (adev->nbio.funcs->get_pcie_index_hi_offset) + pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); + else + pcie_index_hi = 0; + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + if (pcie_index_hi != 0) + pcie_index_hi_offset = (void __iomem *)adev->rmmio + + pcie_index_hi * 4; + + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + writel(reg_data, pcie_data_offset); + readl(pcie_data_offset); + + /* clear the high bits */ + if (pcie_index_hi != 0) { + writel(0, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + /** * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address * * @adev: amdgpu_device pointer - * @pcie_index: mmio register offset - * @pcie_data: mmio register offset * @reg_addr: indirect register offset * @reg_data: indirect register data * @@ -840,6 +918,13 @@ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) return 0; } +static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg) +{ + DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); + BUG(); + return 0; +} + /** * amdgpu_invalid_wreg - dummy reg write function * @@ -857,6 +942,13 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32 BUG(); } +static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v) +{ + DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n", + reg, v); + BUG(); +} + /** * amdgpu_invalid_rreg64 - dummy 64 bit reg read function * @@ -942,7 +1034,8 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev) { amdgpu_asic_pre_asic_init(adev); - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) || + adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) return amdgpu_atomfirmware_asic_init(adev, true); else return amdgpu_atom_asic_init(adev->mode_info.atom_context); @@ -998,7 +1091,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, if (array_size % 3) return; - for (i = 0; i < array_size; i +=3) { + for (i = 0; i < array_size; i += 3) { reg = registers[i + 0]; and_mask = registers[i + 1]; or_mask = registers[i + 2]; @@ -1090,7 +1183,8 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) * doorbells are in the first page. So with paging queue enabled, * the max num_kernel_doorbells should + 1 page (0x400 in dword) */ - if (adev->asic_type >= CHIP_VEGA10) + if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) && + adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0)) adev->doorbell.num_kernel_doorbells += 0x400; } @@ -1291,6 +1385,15 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) return 0; } +static bool amdgpu_device_read_bios(struct amdgpu_device *adev) +{ + if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) { + return false; + } + + return true; +} + /* * GPU helpers function. */ @@ -1310,6 +1413,9 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return false; + if (!amdgpu_device_read_bios(adev)) + return false; + if (amdgpu_passthrough(adev)) { /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot * some old smc fw still need driver do vPost otherwise gpu hang, while @@ -1547,7 +1653,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); amdgpu_sched_jobs = 4; - } else if (!is_power_of_2(amdgpu_sched_jobs)){ + } else if (!is_power_of_2(amdgpu_sched_jobs)) { dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", amdgpu_sched_jobs); amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); @@ -2194,7 +2300,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) total = true; for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { - DRM_ERROR("disabled ip block: %d <%s>\n", + DRM_WARN("disabled ip block: %d <%s>\n", i, adev->ip_blocks[i].version->funcs->name); adev->ip_blocks[i].status.valid = false; } else { @@ -2220,14 +2326,16 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return r; /* Read BIOS */ - if (!amdgpu_get_bios(adev)) - return -EINVAL; + if (amdgpu_device_read_bios(adev)) { + if (!amdgpu_get_bios(adev)) + return -EINVAL; - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - return r; + r = amdgpu_atombios_init(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); + return r; + } } /*get pf2vf msg info at it's earliest time*/ @@ -2376,6 +2484,8 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) } } + amdgpu_xcp_update_partition_sched_list(adev); + return 0; } @@ -2533,8 +2643,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) goto init_failed; /* Don't init kfd if whole hive need to be reset during init */ - if (!adev->gmc.xgmi.pending_reset) + if (!adev->gmc.xgmi.pending_reset) { + kgd2kfd_init_zone_device(adev); amdgpu_amdkfd_device_init(adev); + } amdgpu_fru_get_product_info(adev); @@ -2759,8 +2871,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */ - if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)|| - adev->asic_type == CHIP_ALDEBARAN )) + if (amdgpu_passthrough(adev) && + ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) || + adev->asic_type == CHIP_ALDEBARAN)) amdgpu_dpm_handle_passthrough_sbr(adev, true); if (adev->gmc.xgmi.num_physical_nodes > 1) { @@ -3089,7 +3202,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) } adev->ip_blocks[i].status.hw = false; /* handle putting the SMC in the appropriate state */ - if(!amdgpu_sriov_vf(adev)){ + if (!amdgpu_sriov_vf(adev)) { if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); if (r) { @@ -3608,6 +3721,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->smc_wreg = &amdgpu_invalid_wreg; adev->pcie_rreg = &amdgpu_invalid_rreg; adev->pcie_wreg = &amdgpu_invalid_wreg; + adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext; + adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext; adev->pciep_rreg = &amdgpu_invalid_rreg; adev->pciep_wreg = &amdgpu_invalid_wreg; adev->pcie_rreg64 = &amdgpu_invalid_rreg64; @@ -3633,6 +3748,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->srbm_mutex); mutex_init(&adev->gfx.pipe_reserve_mutex); mutex_init(&adev->gfx.gfx_off_mutex); + mutex_init(&adev->gfx.partition_mutex); mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); @@ -3708,8 +3824,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); - amdgpu_device_get_pcie_info(adev); - if (amdgpu_mcbp) DRM_INFO("MCBP is enabled\n"); @@ -3725,6 +3839,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* detect hw virtualization here */ amdgpu_detect_virtualization(adev); + amdgpu_device_get_pcie_info(adev); + r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); @@ -3753,21 +3869,24 @@ int amdgpu_device_init(struct amdgpu_device *adev, } /* enable PCIE atomic ops */ - if (amdgpu_sriov_vf(adev)) - adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) - adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == - (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); + if (amdgpu_sriov_vf(adev)) { + if (adev->virt.fw_reserve.p_pf2vf) + adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) + adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == + (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a * internal path natively support atomics, set have_atomics_support to true. */ - else if ((adev->flags & AMD_IS_APU) && - (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) + } else if ((adev->flags & AMD_IS_APU) && + (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) { adev->have_atomics_support = true; - else + } else { adev->have_atomics_support = !pci_enable_atomic_ops_to_root(adev->pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); + } + if (!adev->have_atomics_support) dev_info(adev->dev, "PCIE atomic ops is not supported\n"); @@ -3783,7 +3902,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_reset_init(adev); /* detect if we are with an SRIOV vbios */ - amdgpu_device_detect_sriov_bios(adev); + if (adev->bios) + amdgpu_device_detect_sriov_bios(adev); /* check if we need to reset the asic * E.g., driver was not cleanly unloaded previously, etc. @@ -3835,25 +3955,27 @@ int amdgpu_device_init(struct amdgpu_device *adev, } } - if (adev->is_atom_fw) { - /* Initialize clocks */ - r = amdgpu_atomfirmware_get_clock_info(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); - goto failed; - } - } else { - /* Initialize clocks */ - r = amdgpu_atombios_get_clock_info(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); - goto failed; + if (adev->bios) { + if (adev->is_atom_fw) { + /* Initialize clocks */ + r = amdgpu_atomfirmware_get_clock_info(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); + goto failed; + } + } else { + /* Initialize clocks */ + r = amdgpu_atombios_get_clock_info(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); + goto failed; + } + /* init i2c buses */ + if (!amdgpu_device_has_dc_support(adev)) + amdgpu_atombios_i2c_init(adev); } - /* init i2c buses */ - if (!amdgpu_device_has_dc_support(adev)) - amdgpu_atombios_i2c_init(adev); } fence_driver_init: @@ -4019,7 +4141,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) adev->mman.aper_base_kaddr = NULL; /* Memory manager related */ - if (!adev->gmc.xgmi.connected_to_cpu) { + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { arch_phys_wc_del(adev->gmc.vram_mtrr); arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); } @@ -4049,7 +4171,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) /* disable all interrupts */ amdgpu_irq_disable_all(adev); - if (adev->mode_info.mode_config_initialized){ + if (adev->mode_info.mode_config_initialized) { if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev))) drm_helper_force_disable_all(adev_to_drm(adev)); else @@ -4714,42 +4836,42 @@ disabled: int amdgpu_device_mode1_reset(struct amdgpu_device *adev) { - u32 i; - int ret = 0; + u32 i; + int ret = 0; - amdgpu_atombios_scratch_regs_engine_hung(adev, true); + amdgpu_atombios_scratch_regs_engine_hung(adev, true); - dev_info(adev->dev, "GPU mode1 reset\n"); + dev_info(adev->dev, "GPU mode1 reset\n"); - /* disable BM */ - pci_clear_master(adev->pdev); + /* disable BM */ + pci_clear_master(adev->pdev); - amdgpu_device_cache_pci_state(adev->pdev); + amdgpu_device_cache_pci_state(adev->pdev); - if (amdgpu_dpm_is_mode1_reset_supported(adev)) { - dev_info(adev->dev, "GPU smu mode1 reset\n"); - ret = amdgpu_dpm_mode1_reset(adev); - } else { - dev_info(adev->dev, "GPU psp mode1 reset\n"); - ret = psp_gpu_reset(adev); - } + if (amdgpu_dpm_is_mode1_reset_supported(adev)) { + dev_info(adev->dev, "GPU smu mode1 reset\n"); + ret = amdgpu_dpm_mode1_reset(adev); + } else { + dev_info(adev->dev, "GPU psp mode1 reset\n"); + ret = psp_gpu_reset(adev); + } - if (ret) - dev_err(adev->dev, "GPU mode1 reset failed\n"); + if (ret) + dev_err(adev->dev, "GPU mode1 reset failed\n"); - amdgpu_device_load_pci_state(adev->pdev); + amdgpu_device_load_pci_state(adev->pdev); - /* wait for asic to come out of reset */ - for (i = 0; i < adev->usec_timeout; i++) { - u32 memsize = adev->nbio.funcs->get_memsize(adev); + /* wait for asic to come out of reset */ + for (i = 0; i < adev->usec_timeout; i++) { + u32 memsize = adev->nbio.funcs->get_memsize(adev); - if (memsize != 0xffffffff) - break; - udelay(1); - } + if (memsize != 0xffffffff) + break; + udelay(1); + } - amdgpu_atombios_scratch_regs_engine_hung(adev, false); - return ret; + amdgpu_atombios_scratch_regs_engine_hung(adev, false); + return ret; } int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, @@ -5478,7 +5600,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; /* covers APUs as well */ - if (pci_is_root_bus(adev->pdev->bus)) { + if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) { if (adev->pm.pcie_gen_mask == 0) adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; if (adev->pm.pcie_mlw_mask == 0) @@ -5959,6 +6081,7 @@ void amdgpu_device_halt(struct amdgpu_device *adev) struct pci_dev *pdev = adev->pdev; struct drm_device *ddev = adev_to_drm(adev); + amdgpu_xcp_dev_unplug(adev); drm_dev_unplug(ddev); amdgpu_irq_disable_all(adev); @@ -6079,3 +6202,31 @@ bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) return true; } } + +uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, + uint32_t inst, uint32_t reg_addr, char reg_name[], + uint32_t expected_value, uint32_t mask) +{ + uint32_t ret = 0; + uint32_t old_ = 0; + uint32_t tmp_ = RREG32(reg_addr); + uint32_t loop = adev->usec_timeout; + + while ((tmp_ & (mask)) != (expected_value)) { + if (old_ != tmp_) { + loop = adev->usec_timeout; + old_ = tmp_; + } else + udelay(1); + tmp_ = RREG32(reg_addr); + loop--; + if (!loop) { + DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", + inst, reg_name, (uint32_t)expected_value, + (uint32_t)(tmp_ & (mask))); + ret = -ETIMEDOUT; + break; + } + } + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 0ecce0b92b82..8e1cfc87122d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -30,6 +30,7 @@ #include "soc15.h" #include "gfx_v9_0.h" +#include "gfx_v9_4_3.h" #include "gmc_v9_0.h" #include "df_v1_7.h" #include "df_v3_6.h" @@ -76,12 +77,15 @@ #include "jpeg_v3_0.h" #include "vcn_v4_0.h" #include "jpeg_v4_0.h" +#include "vcn_v4_0_3.h" +#include "jpeg_v4_0_3.h" #include "amdgpu_vkms.h" #include "mes_v10_1.h" #include "mes_v11_0.h" #include "smuio_v11_0.h" #include "smuio_v11_0_6.h" #include "smuio_v13_0.h" +#include "smuio_v13_0_3.h" #include "smuio_v13_0_6.h" #define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" @@ -200,14 +204,44 @@ static int hw_id_map[MAX_HWIP] = { [PCIE_HWIP] = PCIE_HWID, }; -static int amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, uint8_t *binary) +static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary) +{ + u64 tmr_offset, tmr_size, pos; + void *discv_regn; + int ret; + + ret = amdgpu_acpi_get_tmr_info(adev, &tmr_offset, &tmr_size); + if (ret) + return ret; + + pos = tmr_offset + tmr_size - DISCOVERY_TMR_OFFSET; + + /* This region is read-only and reserved from system use */ + discv_regn = memremap(pos, adev->mman.discovery_tmr_size, MEMREMAP_WC); + if (discv_regn) { + memcpy(binary, discv_regn, adev->mman.discovery_tmr_size); + memunmap(discv_regn); + return 0; + } + + return -ENOENT; +} + +static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, + uint8_t *binary) { uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; - uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; + int ret = 0; - amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, - adev->mman.discovery_tmr_size, false); - return 0; + if (vram_size) { + uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; + amdgpu_device_vram_access(adev, pos, (uint32_t *)binary, + adev->mman.discovery_tmr_size, false); + } else { + ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary); + } + + return ret; } static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary) @@ -280,6 +314,7 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev) case 0xCF: case 0xDF: adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; + adev->vcn.inst_mask &= ~AMDGPU_VCN_HARVEST_VCN1; break; default: break; @@ -301,33 +336,30 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) if (!adev->mman.discovery_bin) return -ENOMEM; - r = amdgpu_discovery_read_binary_from_vram(adev, adev->mman.discovery_bin); - if (r) { - dev_err(adev->dev, "failed to read ip discovery binary from vram\n"); - r = -EINVAL; - goto out; - } - - if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin) || amdgpu_discovery == 2) { - /* ignore the discovery binary from vram if discovery=2 in kernel module parameter */ - if (amdgpu_discovery == 2) - dev_info(adev->dev,"force read ip discovery binary from file"); - else - dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n"); - - /* retry read ip discovery binary from file */ + /* Read from file if it is the preferred option */ + if (amdgpu_discovery == 2) { + dev_info(adev->dev, "use ip discovery information from file"); r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); + if (r) { dev_err(adev->dev, "failed to read ip discovery binary from file\n"); r = -EINVAL; goto out; } - /* check the ip discovery binary signature */ - if(!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { - dev_warn(adev->dev, "get invalid ip discovery binary signature from file\n"); - r = -EINVAL; + + } else { + r = amdgpu_discovery_read_binary_from_mem( + adev, adev->mman.discovery_bin); + if (r) goto out; - } + } + + /* check the ip discovery binary signature */ + if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { + dev_err(adev->dev, + "get invalid ip discovery binary signature\n"); + r = -EINVAL; + goto out; } bhdr = (struct binary_header *)adev->mman.discovery_bin; @@ -471,11 +503,11 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev) adev->mman.discovery_bin = NULL; } -static int amdgpu_discovery_validate_ip(const struct ip *ip) +static int amdgpu_discovery_validate_ip(const struct ip_v4 *ip) { - if (ip->number_instance >= HWIP_MAX_INSTANCE) { - DRM_ERROR("Unexpected number_instance (%d) from ip discovery blob\n", - ip->number_instance); + if (ip->instance_number >= HWIP_MAX_INSTANCE) { + DRM_ERROR("Unexpected instance_number (%d) from ip discovery blob\n", + ip->instance_number); return -EINVAL; } if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) { @@ -493,7 +525,7 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, struct binary_header *bhdr; struct ip_discovery_header *ihdr; struct die_header *dhdr; - struct ip *ip; + struct ip_v4 *ip; uint16_t die_offset, ip_offset, num_dies, num_ips; int i, j; @@ -510,29 +542,41 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, ip_offset = die_offset + sizeof(*dhdr); for (j = 0; j < num_ips; j++) { - ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); + ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); if (amdgpu_discovery_validate_ip(ip)) goto next_ip; - if (le16_to_cpu(ip->harvest) == 1) { + if (le16_to_cpu(ip->variant) == 1) { switch (le16_to_cpu(ip->hw_id)) { case VCN_HWID: (*vcn_harvest_count)++; - if (ip->number_instance == 0) + if (ip->instance_number == 0) { adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0; - else + adev->vcn.inst_mask &= + ~AMDGPU_VCN_HARVEST_VCN0; + adev->jpeg.inst_mask &= + ~AMDGPU_VCN_HARVEST_VCN0; + } else { adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; + adev->vcn.inst_mask &= + ~AMDGPU_VCN_HARVEST_VCN1; + adev->jpeg.inst_mask &= + ~AMDGPU_VCN_HARVEST_VCN1; + } break; case DMU_HWID: adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; break; default: break; - } - } + } + } next_ip: - ip_offset += struct_size(ip, base_address, ip->num_base_address); + if (ihdr->base_addr_64_bit) + ip_offset += struct_size(ip, base_address_64, ip->num_base_address); + else + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } } @@ -564,10 +608,15 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev, switch (le16_to_cpu(harvest_info->list[i].hw_id)) { case VCN_HWID: (*vcn_harvest_count)++; - if (harvest_info->list[i].number_instance == 0) - adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0; - else - adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; + adev->vcn.harvest_config |= + (1 << harvest_info->list[i].number_instance); + adev->jpeg.harvest_config |= + (1 << harvest_info->list[i].number_instance); + + adev->vcn.inst_mask &= + ~(1U << harvest_info->list[i].number_instance); + adev->jpeg.inst_mask &= + ~(1U << harvest_info->list[i].number_instance); break; case DMU_HWID: adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; @@ -577,6 +626,14 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev, 1 << (le16_to_cpu(harvest_info->list[i].number_instance)); (*umc_harvest_count)++; break; + case GC_HWID: + adev->gfx.xcc_mask &= + ~(1U << harvest_info->list[i].number_instance); + break; + case SDMA0_HWID: + adev->sdma.sdma_mask &= + ~(1U << harvest_info->list[i].number_instance); + break; default: break; } @@ -836,9 +893,40 @@ static void ip_disc_release(struct kobject *kobj) kfree(ip_top); } +static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev, + uint16_t hw_id, uint8_t inst) +{ + uint8_t harvest = 0; + + /* Until a uniform way is figured, get mask based on hwid */ + switch (hw_id) { + case VCN_HWID: + harvest = ((1 << inst) & adev->vcn.inst_mask) == 0; + break; + case DMU_HWID: + if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK) + harvest = 0x1; + break; + case UMC_HWID: + /* TODO: It needs another parsing; for now, ignore.*/ + break; + case GC_HWID: + harvest = ((1 << inst) & adev->gfx.xcc_mask) == 0; + break; + case SDMA0_HWID: + harvest = ((1 << inst) & adev->sdma.sdma_mask) == 0; + break; + default: + break; + } + + return harvest; +} + static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, struct ip_die_entry *ip_die_entry, - const size_t _ip_offset, const int num_ips) + const size_t _ip_offset, const int num_ips, + bool reg_base_64) { int ii, jj, kk, res; @@ -852,10 +940,10 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, size_t ip_offset = _ip_offset; for (jj = 0; jj < num_ips; jj++) { - struct ip *ip; + struct ip_v4 *ip; struct ip_hw_instance *ip_hw_instance; - ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); + ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); if (amdgpu_discovery_validate_ip(ip) || le16_to_cpu(ip->hw_id) != ii) goto next_ip; @@ -903,22 +991,35 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, return -ENOMEM; } ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */ - ip_hw_instance->num_instance = ip->number_instance; + ip_hw_instance->num_instance = ip->instance_number; ip_hw_instance->major = ip->major; ip_hw_instance->minor = ip->minor; ip_hw_instance->revision = ip->revision; - ip_hw_instance->harvest = ip->harvest; + ip_hw_instance->harvest = + amdgpu_discovery_get_harvest_info( + adev, ip_hw_instance->hw_id, + ip_hw_instance->num_instance); ip_hw_instance->num_base_addresses = ip->num_base_address; - for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) - ip_hw_instance->base_addr[kk] = ip->base_address[kk]; + for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) { + if (reg_base_64) + ip_hw_instance->base_addr[kk] = + lower_32_bits(le64_to_cpu(ip->base_address_64[kk])) & 0x3FFFFFFF; + else + ip_hw_instance->base_addr[kk] = ip->base_address[kk]; + } kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype); ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset; res = kobject_add(&ip_hw_instance->kobj, NULL, "%d", ip_hw_instance->num_instance); next_ip: - ip_offset += struct_size(ip, base_address, ip->num_base_address); + if (reg_base_64) + ip_offset += struct_size(ip, base_address_64, + ip->num_base_address); + else + ip_offset += struct_size(ip, base_address, + ip->num_base_address); } } @@ -972,7 +1073,7 @@ static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev) return res; } - amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips); + amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips, !!ihdr->base_addr_64_bit); } return 0; @@ -983,6 +1084,9 @@ static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev) struct kset *die_kset; int res, ii; + if (!adev->mman.discovery_bin) + return -EINVAL; + adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL); if (!adev->ip_top) return -ENOMEM; @@ -1082,7 +1186,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) struct binary_header *bhdr; struct ip_discovery_header *ihdr; struct die_header *dhdr; - struct ip *ip; + struct ip_v4 *ip; uint16_t die_offset; uint16_t ip_offset; uint16_t num_dies; @@ -1098,6 +1202,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) return r; } + adev->gfx.xcc_mask = 0; + adev->sdma.sdma_mask = 0; + adev->vcn.inst_mask = 0; + adev->jpeg.inst_mask = 0; bhdr = (struct binary_header *)adev->mman.discovery_bin; ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); @@ -1121,7 +1229,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) le16_to_cpu(dhdr->die_id), num_ips); for (j = 0; j < num_ips; j++) { - ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); + ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); if (amdgpu_discovery_validate_ip(ip)) goto next_ip; @@ -1131,7 +1239,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n", hw_id_names[le16_to_cpu(ip->hw_id)], le16_to_cpu(ip->hw_id), - ip->number_instance, + ip->instance_number, ip->major, ip->minor, ip->revision); @@ -1145,23 +1253,33 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = ip->revision & 0xc0; ip->revision &= ~0xc0; - if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES) + if (adev->vcn.num_vcn_inst < + AMDGPU_MAX_VCN_INSTANCES) { adev->vcn.num_vcn_inst++; - else + adev->vcn.inst_mask |= + (1U << ip->instance_number); + adev->jpeg.inst_mask |= + (1U << ip->instance_number); + } else { dev_err(adev->dev, "Too many VCN instances: %d vs %d\n", adev->vcn.num_vcn_inst + 1, AMDGPU_MAX_VCN_INSTANCES); + } } if (le16_to_cpu(ip->hw_id) == SDMA0_HWID || le16_to_cpu(ip->hw_id) == SDMA1_HWID || le16_to_cpu(ip->hw_id) == SDMA2_HWID || le16_to_cpu(ip->hw_id) == SDMA3_HWID) { - if (adev->sdma.num_instances < AMDGPU_MAX_SDMA_INSTANCES) + if (adev->sdma.num_instances < + AMDGPU_MAX_SDMA_INSTANCES) { adev->sdma.num_instances++; - else + adev->sdma.sdma_mask |= + (1U << ip->instance_number); + } else { dev_err(adev->dev, "Too many SDMA instances: %d vs %d\n", adev->sdma.num_instances + 1, AMDGPU_MAX_SDMA_INSTANCES); + } } if (le16_to_cpu(ip->hw_id) == UMC_HWID) { @@ -1169,20 +1287,38 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) adev->umc.node_inst_num++; } + if (le16_to_cpu(ip->hw_id) == GC_HWID) + adev->gfx.xcc_mask |= + (1U << ip->instance_number); + for (k = 0; k < num_base_address; k++) { /* * convert the endianness of base addresses in place, * so that we don't need to convert them when accessing adev->reg_offset. */ - ip->base_address[k] = le32_to_cpu(ip->base_address[k]); + if (ihdr->base_addr_64_bit) + /* Truncate the 64bit base address from ip discovery + * and only store lower 32bit ip base in reg_offset[]. + * Bits > 32 follows ASIC specific format, thus just + * discard them and handle it within specific ASIC. + * By this way reg_offset[] and related helpers can + * stay unchanged. + * The base address is in dwords, thus clear the + * highest 2 bits to store. + */ + ip->base_address[k] = + lower_32_bits(le64_to_cpu(ip->base_address_64[k])) & 0x3FFFFFFF; + else + ip->base_address[k] = le32_to_cpu(ip->base_address[k]); DRM_DEBUG("\t0x%08x\n", ip->base_address[k]); } for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) { - if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) { + if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id) && + hw_id_map[hw_ip] != 0) { DRM_DEBUG("set register base offset for %s\n", hw_id_names[le16_to_cpu(ip->hw_id)]); - adev->reg_offset[hw_ip][ip->number_instance] = + adev->reg_offset[hw_ip][ip->instance_number] = ip->base_address; /* Instance support is somewhat inconsistent. * SDMA is a good example. Sienna cichlid has 4 total @@ -1193,69 +1329,22 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) * example. On most chips there are multiple instances * with the same HWID. */ - adev->ip_versions[hw_ip][ip->number_instance] = + adev->ip_versions[hw_ip][ip->instance_number] = IP_VERSION(ip->major, ip->minor, ip->revision); } } next_ip: - ip_offset += struct_size(ip, base_address, ip->num_base_address); + if (ihdr->base_addr_64_bit) + ip_offset += struct_size(ip, base_address_64, ip->num_base_address); + else + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } - amdgpu_discovery_sysfs_init(adev); - return 0; } -int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance, - int *major, int *minor, int *revision) -{ - struct binary_header *bhdr; - struct ip_discovery_header *ihdr; - struct die_header *dhdr; - struct ip *ip; - uint16_t die_offset; - uint16_t ip_offset; - uint16_t num_dies; - uint16_t num_ips; - int i, j; - - if (!adev->mman.discovery_bin) { - DRM_ERROR("ip discovery uninitialized\n"); - return -EINVAL; - } - - bhdr = (struct binary_header *)adev->mman.discovery_bin; - ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + - le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); - num_dies = le16_to_cpu(ihdr->num_dies); - - for (i = 0; i < num_dies; i++) { - die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); - dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset); - num_ips = le16_to_cpu(dhdr->num_ips); - ip_offset = die_offset + sizeof(*dhdr); - - for (j = 0; j < num_ips; j++) { - ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); - - if ((le16_to_cpu(ip->hw_id) == hw_id) && (ip->number_instance == number_instance)) { - if (major) - *major = ip->major; - if (minor) - *minor = ip->minor; - if (revision) - *revision = ip->revision; - return 0; - } - ip_offset += struct_size(ip, base_address, ip->num_base_address); - } - } - - return -EINVAL; -} - static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) { int vcn_harvest_count = 0; @@ -1266,7 +1355,8 @@ static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) * so read harvest bit per IP data structure to set * harvest configuration. */ - if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0)) { + if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0) && + adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) { if ((adev->pdev->device == 0x731E && (adev->pdev->revision == 0xC6 || adev->pdev->revision == 0xC7)) || @@ -1425,6 +1515,7 @@ static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) mall_size += mall_size_per_umc; } adev->gmc.mall_size = mall_size; + adev->gmc.m_half_use = half_use; break; default: dev_err(adev->dev, @@ -1706,6 +1797,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 4): case IP_VERSION(13, 0, 5): + case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): @@ -1804,6 +1896,11 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); break; + case IP_VERSION(9, 4, 3): + if (!amdgpu_exp_hw_support) + return -EINVAL; + amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block); + break; case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 1): @@ -1939,7 +2036,6 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 1, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 0, 2): - case IP_VERSION(3, 0, 192): amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); @@ -1952,7 +2048,11 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 0, 4): amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block); - return 0; + break; + case IP_VERSION(4, 0, 3): + amdgpu_device_ip_block_add(adev, &vcn_v4_0_3_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block); + break; default: dev_err(adev->dev, "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n", @@ -2000,6 +2100,17 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) return 0; } +static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 3): + aqua_vanjaram_init_soc_config(adev); + break; + default: + break; + } +} + int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) { int r; @@ -2177,6 +2288,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; } + amdgpu_discovery_init_soc_config(adev); + amdgpu_discovery_sysfs_init(adev); + switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 0, 1): case IP_VERSION(9, 2, 1): @@ -2387,6 +2501,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 2): adev->smuio.funcs = &smuio_v13_0_funcs; break; + case IP_VERSION(13, 0, 3): + adev->smuio.funcs = &smuio_v13_0_3_funcs; + if (adev->smuio.funcs->get_pkg_type(adev) == AMDGPU_PKG_TYPE_APU) { + adev->flags |= AMD_IS_APU; + } + break; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 8): adev->smuio.funcs = &smuio_v13_0_6_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index 8563dd4a7dc2..3a2f347bd50d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -24,12 +24,10 @@ #ifndef __AMDGPU_DISCOVERY__ #define __AMDGPU_DISCOVERY__ -#define DISCOVERY_TMR_SIZE (4 << 10) +#define DISCOVERY_TMR_SIZE (8 << 10) #define DISCOVERY_TMR_OFFSET (64 << 10) void amdgpu_discovery_fini(struct amdgpu_device *adev); -int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance, - int *major, int *minor, int *revision); int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev); #endif /* __AMDGPU_DISCOVERY__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index d60fe7eb5579..b702f499f5fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -98,7 +98,7 @@ static void amdgpu_display_flip_callback(struct dma_fence *f, static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work, struct dma_fence **f) { - struct dma_fence *fence= *f; + struct dma_fence *fence = *f; if (fence == NULL) return false; @@ -1252,21 +1252,21 @@ const struct drm_mode_config_funcs amdgpu_mode_funcs = { .fb_create = amdgpu_display_user_framebuffer_create, }; -static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = -{ { UNDERSCAN_OFF, "off" }, +static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = { + { UNDERSCAN_OFF, "off" }, { UNDERSCAN_ON, "on" }, { UNDERSCAN_AUTO, "auto" }, }; -static const struct drm_prop_enum_list amdgpu_audio_enum_list[] = -{ { AMDGPU_AUDIO_DISABLE, "off" }, +static const struct drm_prop_enum_list amdgpu_audio_enum_list[] = { + { AMDGPU_AUDIO_DISABLE, "off" }, { AMDGPU_AUDIO_ENABLE, "on" }, { AMDGPU_AUDIO_AUTO, "auto" }, }; /* XXX support different dither options? spatial, temporal, both, etc. */ -static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = -{ { AMDGPU_FMT_DITHER_DISABLE, "off" }, +static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = { + { AMDGPU_FMT_DITHER_DISABLE, "off" }, { AMDGPU_FMT_DITHER_ENABLE, "on" }, }; @@ -1496,8 +1496,7 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, ret |= DRM_SCANOUTPOS_ACCURATE; vbl_start = vbl & 0x1fff; vbl_end = (vbl >> 16) & 0x1fff; - } - else { + } else { /* No: Fake something reasonable which gives at least ok results. */ vbl_start = mode->crtc_vdisplay; vbl_end = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 0c001bb8fc2b..12210598e5b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -149,7 +149,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, if (!bo->tbo.pin_count) { /* move buffer into GTT or VRAM */ struct ttm_operation_ctx ctx = { false, false }; - unsigned domains = AMDGPU_GEM_DOMAIN_GTT; + unsigned int domains = AMDGPU_GEM_DOMAIN_GTT; if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM && attach->peer2peer) { @@ -336,7 +336,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_CPU, flags, - ttm_bo_type_sg, resv, &gobj); + ttm_bo_type_sg, resv, &gobj, 0); if (ret) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 8fd11497faba..f637574644c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -59,7 +59,7 @@ struct amdgpu_doorbell_index { uint32_t gfx_ring1; uint32_t gfx_userqueue_start; uint32_t gfx_userqueue_end; - uint32_t sdma_engine[8]; + uint32_t sdma_engine[16]; uint32_t mes_ring0; uint32_t mes_ring1; uint32_t ih; @@ -86,6 +86,8 @@ struct amdgpu_doorbell_index { uint32_t max_assignment; /* Per engine SDMA doorbell size in dword */ uint32_t sdma_doorbell_range; + /* Per xcc doorbell size for KIQ/KCQ */ + uint32_t xcc_doorbell_range; }; typedef enum _AMDGPU_DOORBELL_ASSIGNMENT @@ -164,7 +166,15 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0, AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7, - AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F, + /* kiq/kcq from second XCD. Max 8 XCDs */ + AMDGPU_VEGA20_DOORBELL_XCC1_KIQ_START = 0x190, + /* 8 compute rings per GC. Max to 0x1CE */ + AMDGPU_VEGA20_DOORBELL_XCC1_MEC_RING0_START = 0x197, + + /* AID1 SDMA: 0x1D0 ~ 0x1F7 */ + AMDGPU_VEGA20_DOORBELL_AID1_sDMA_START = 0x1D0, + + AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x1F7, AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT; @@ -301,6 +311,36 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_INVALID = 0xFFFF } AMDGPU_DOORBELL64_ASSIGNMENT; +typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 { + /* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */ + + /* KIQ/HIQ/DIQ */ + AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000, + AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x001, + AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x002, + /* Compute: 0x08 ~ 0x20 */ + AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x008, + AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x00F, + AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x010, + AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x01F, + AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE = 0x020, + + /* SDMA: 0x100 ~ 0x19F */ + AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100, + AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F, + /* IH: 0x1A0 ~ 0x1AF */ + AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0, + /* VCN: 0x1B0 ~ 0x1D4 */ + AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0, + AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4, + + AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START, + AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END, + + AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4, + AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF +} AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1; + u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 393b6fb7a71d..3b711babd4e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -50,6 +50,7 @@ #include "amdgpu_ras.h" #include "amdgpu_xgmi.h" #include "amdgpu_reset.h" +#include "../amdxcp/amdgpu_xcp_drv.h" /* * KMS wrapper. @@ -110,9 +111,11 @@ * 3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info fields: * tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, sqc_inst_cache_size, * gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi + * 3.53.0 - Support for GFX11 CP GFX shadowing + * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 52 +#define KMS_DRIVER_MINOR 54 #define KMS_DRIVER_PATCHLEVEL 0 unsigned int amdgpu_vram_limit = UINT_MAX; @@ -150,7 +153,7 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; - +bool enforce_isolation; /* * OverDrive(bit 14) disabled by default * GFX DCS(bit 19) disabled by default @@ -191,6 +194,7 @@ int amdgpu_smartshift_bias; int amdgpu_use_xgmi_p2p = 1; int amdgpu_vcnfw_log; int amdgpu_sg_display = -1; /* auto */ +int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -820,6 +824,13 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm #endif /** + * DOC: mtype_local (int) + */ +int amdgpu_mtype_local; +MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)"); +module_param_named(mtype_local, amdgpu_mtype_local, int, 0444); + +/** * DOC: pcie_p2p (bool) * Enable PCIe P2P (requires large-BAR). Default value: true (on) */ @@ -948,6 +959,28 @@ MODULE_PARM_DESC(smu_pptable_id, "specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)"); module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444); +/** + * DOC: partition_mode (int) + * Used to override the default SPX mode. + */ +MODULE_PARM_DESC( + user_partt_mode, + "specify partition mode to be used (-2 = AMDGPU_AUTO_COMPUTE_PARTITION_MODE(default value) \ + 0 = AMDGPU_SPX_PARTITION_MODE, \ + 1 = AMDGPU_DPX_PARTITION_MODE, \ + 2 = AMDGPU_TPX_PARTITION_MODE, \ + 3 = AMDGPU_QPX_PARTITION_MODE, \ + 4 = AMDGPU_CPX_PARTITION_MODE)"); +module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); + + +/** + * DOC: enforce_isolation (bool) + * enforce process isolation between graphics and compute via using the same reserved vmid. + */ +module_param(enforce_isolation, bool, 0444); +MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ @@ -1661,7 +1694,7 @@ static const u16 amdgpu_unsupported_pciidlist[] = { }; static const struct pci_device_id pciidlist[] = { -#ifdef CONFIG_DRM_AMDGPU_SI +#ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, {0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -2018,6 +2051,11 @@ static const struct pci_device_id pciidlist[] = { .class_mask = 0xffffff, .driver_data = CHIP_IP_DISCOVERY }, + { PCI_DEVICE(0x1002, PCI_ANY_ID), + .class = PCI_CLASS_ACCELERATOR_PROCESSING << 8, + .class_mask = 0xffffff, + .driver_data = CHIP_IP_DISCOVERY }, + {0, 0, 0} }; @@ -2162,6 +2200,10 @@ retry_init: goto err_pci; } + ret = amdgpu_xcp_dev_register(adev, ent); + if (ret) + goto err_pci; + /* * 1. don't init fbdev on hw without DCE * 2. don't init fbdev if there are no connectors @@ -2234,6 +2276,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); + amdgpu_xcp_dev_unplug(adev); drm_dev_unplug(dev); if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { @@ -2748,7 +2791,7 @@ static const struct file_operations amdgpu_driver_kms_fops = { .compat_ioctl = amdgpu_kms_compat_ioctl, #endif #ifdef CONFIG_PROC_FS - .show_fdinfo = amdgpu_show_fdinfo + .show_fdinfo = drm_show_fdinfo, #endif }; @@ -2803,6 +2846,36 @@ static const struct drm_driver amdgpu_kms_driver = { .dumb_map_offset = amdgpu_mode_dumb_mmap, .fops = &amdgpu_driver_kms_fops, .release = &amdgpu_driver_release_kms, +#ifdef CONFIG_PROC_FS + .show_fdinfo = amdgpu_show_fdinfo, +#endif + + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = amdgpu_gem_prime_import, + .gem_prime_mmap = drm_gem_prime_mmap, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = KMS_DRIVER_MAJOR, + .minor = KMS_DRIVER_MINOR, + .patchlevel = KMS_DRIVER_PATCHLEVEL, +}; + +const struct drm_driver amdgpu_partition_driver = { + .driver_features = + DRIVER_GEM | DRIVER_RENDER | DRIVER_SYNCOBJ | + DRIVER_SYNCOBJ_TIMELINE, + .open = amdgpu_driver_open_kms, + .postclose = amdgpu_driver_postclose_kms, + .lastclose = amdgpu_driver_lastclose_kms, + .ioctls = amdgpu_ioctls_kms, + .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms), + .dumb_create = amdgpu_mode_dumb_create, + .dumb_map_offset = amdgpu_mode_dumb_mmap, + .fops = &amdgpu_driver_kms_fops, + .release = &amdgpu_driver_release_kms, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, @@ -2884,9 +2957,11 @@ static void __exit amdgpu_exit(void) amdgpu_amdkfd_fini(); pci_unregister_driver(&amdgpu_kms_pci_driver); amdgpu_unregister_atpx_handler(); + amdgpu_acpi_release(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); mmu_notifier_synchronize(); + amdgpu_xcp_drv_release(); } module_init(amdgpu_init); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h index 8178323e4bef..5bc2cb661af7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h @@ -42,6 +42,8 @@ #define DRIVER_DESC "AMD GPU" #define DRIVER_DATE "20150101" +extern const struct drm_driver amdgpu_partition_driver; + long amdgpu_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c index 27a782a9dc72..3aaeed2d3562 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c @@ -70,6 +70,7 @@ void amdgpu_encoder_set_active_device(struct drm_encoder *encoder) drm_for_each_connector_iter(connector, &iter) { if (connector->encoder == encoder) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); + amdgpu_encoder->active_device = amdgpu_encoder->devices & amdgpu_connector->devices; DRM_DEBUG_KMS("setting active device to %08x from %08x %08x for encoder %d\n", amdgpu_encoder->active_device, amdgpu_encoder->devices, @@ -165,12 +166,12 @@ void amdgpu_panel_mode_fixup(struct drm_encoder *encoder, { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode; - unsigned hblank = native_mode->htotal - native_mode->hdisplay; - unsigned vblank = native_mode->vtotal - native_mode->vdisplay; - unsigned hover = native_mode->hsync_start - native_mode->hdisplay; - unsigned vover = native_mode->vsync_start - native_mode->vdisplay; - unsigned hsync_width = native_mode->hsync_end - native_mode->hsync_start; - unsigned vsync_width = native_mode->vsync_end - native_mode->vsync_start; + unsigned int hblank = native_mode->htotal - native_mode->hdisplay; + unsigned int vblank = native_mode->vtotal - native_mode->vdisplay; + unsigned int hover = native_mode->hsync_start - native_mode->hdisplay; + unsigned int vover = native_mode->vsync_start - native_mode->vdisplay; + unsigned int hsync_width = native_mode->hsync_end - native_mode->hsync_start; + unsigned int vsync_width = native_mode->vsync_end - native_mode->vsync_start; adjusted_mode->clock = native_mode->clock; adjusted_mode->flags = native_mode->flags; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index c57252f004e8..13d7413d4ca3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -53,9 +53,8 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = { [AMDGPU_HW_IP_VCN_JPEG] = "jpeg", }; -void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) +void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) { - struct drm_file *file = f->private_data; struct amdgpu_device *adev = drm_to_adev(file->minor->dev); struct amdgpu_fpriv *fpriv = file->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; @@ -87,31 +86,30 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f) * ****************************************************************** */ - seq_printf(m, "pasid:\t%u\n", fpriv->vm.pasid); - seq_printf(m, "drm-driver:\t%s\n", file->minor->dev->driver->name); - seq_printf(m, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn); - seq_printf(m, "drm-client-id:\t%Lu\n", vm->immediate.fence_context); - seq_printf(m, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL); - seq_printf(m, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL); - seq_printf(m, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL); - seq_printf(m, "amd-memory-visible-vram:\t%llu KiB\n", + drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid); + drm_printf(p, "drm-driver:\t%s\n", file->minor->dev->driver->name); + drm_printf(p, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn); + drm_printf(p, "drm-client-id:\t%Lu\n", vm->immediate.fence_context); + drm_printf(p, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL); + drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL); + drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL); + drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n", stats.visible_vram/1024UL); - seq_printf(m, "amd-evicted-vram:\t%llu KiB\n", + drm_printf(p, "amd-evicted-vram:\t%llu KiB\n", stats.evicted_vram/1024UL); - seq_printf(m, "amd-evicted-visible-vram:\t%llu KiB\n", + drm_printf(p, "amd-evicted-visible-vram:\t%llu KiB\n", stats.evicted_visible_vram/1024UL); - seq_printf(m, "amd-requested-vram:\t%llu KiB\n", + drm_printf(p, "amd-requested-vram:\t%llu KiB\n", stats.requested_vram/1024UL); - seq_printf(m, "amd-requested-visible-vram:\t%llu KiB\n", + drm_printf(p, "amd-requested-visible-vram:\t%llu KiB\n", stats.requested_visible_vram/1024UL); - seq_printf(m, "amd-requested-gtt:\t%llu KiB\n", + drm_printf(p, "amd-requested-gtt:\t%llu KiB\n", stats.requested_gtt/1024UL); - for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { if (!usage[hw_ip]) continue; - seq_printf(m, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip], + drm_printf(p, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip], ktime_to_ns(usage[hw_ip])); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h index e86834bfea1d..0398f5a159ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.h @@ -37,6 +37,6 @@ #include "amdgpu_ids.h" uint32_t amdgpu_get_ip_count(struct amdgpu_device *adev, int id); -void amdgpu_show_fdinfo(struct seq_file *m, struct file *f); +void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index a7d250809da9..c694b41f6461 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -42,7 +42,6 @@ #include "amdgpu_reset.h" /* - * Fences * Fences mark an event in the GPUs pipeline and are used * for GPU/CPU synchronization. When the fence is written, * it is expected that all buffers associated with that fence @@ -140,7 +139,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * Returns 0 on success, -ENOMEM on failure. */ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amdgpu_job *job, - unsigned flags) + unsigned int flags) { struct amdgpu_device *adev = ring->adev; struct dma_fence *fence; @@ -174,11 +173,11 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd adev->fence_context + ring->idx, seq); /* Against remove in amdgpu_job_{free, free_cb} */ dma_fence_get(fence); - } - else + } else { dma_fence_init(fence, &amdgpu_fence_ops, &ring->fence_drv.lock, adev->fence_context + ring->idx, seq); + } } amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, @@ -377,14 +376,11 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, uint32_t wait_seq, signed long timeout) { - uint32_t seq; - - do { - seq = amdgpu_fence_read(ring); - udelay(5); - timeout -= 5; - } while ((int32_t)(wait_seq - seq) > 0 && timeout > 0); + while ((int32_t)(wait_seq - amdgpu_fence_read(ring)) > 0 && timeout > 0) { + udelay(2); + timeout -= 2; + } return timeout > 0 ? timeout : 0; } /** @@ -396,7 +392,7 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, * Returns the number of emitted fences on the ring. Used by the * dynpm code to ring track activity. */ -unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) +unsigned int amdgpu_fence_count_emitted(struct amdgpu_ring *ring) { uint64_t emitted; @@ -475,7 +471,7 @@ void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, */ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq_src, - unsigned irq_type) + unsigned int irq_type) { struct amdgpu_device *adev = ring->adev; uint64_t index; @@ -654,6 +650,7 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; + if (!ring || !ring->fence_drv.initialized) continue; @@ -695,6 +692,30 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) } /** + * amdgpu_fence_driver_set_error - set error code on fences + * @ring: the ring which contains the fences + * @error: the error code to set + * + * Set an error code to all the fences pending on the ring. + */ +void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error) +{ + struct amdgpu_fence_driver *drv = &ring->fence_drv; + unsigned long flags; + + spin_lock_irqsave(&drv->lock, flags); + for (unsigned int i = 0; i <= drv->num_fences_mask; ++i) { + struct dma_fence *fence; + + fence = rcu_dereference_protected(drv->fences[i], + lockdep_is_held(&drv->lock)); + if (fence && !dma_fence_is_signaled_locked(fence)) + dma_fence_set_error(fence, error); + } + spin_unlock_irqrestore(&drv->lock, flags); +} + +/** * amdgpu_fence_driver_force_completion - force signal latest fence of ring * * @ring: fence of the ring to signal @@ -702,6 +723,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) */ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) { + amdgpu_fence_driver_set_error(ring, -ECANCELED); amdgpu_fence_write(ring, ring->fence_drv.sync_seq); amdgpu_fence_process(ring); } @@ -836,11 +858,12 @@ static const struct dma_fence_ops amdgpu_job_fence_ops = { #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_fence_info_show(struct seq_file *m, void *unused) { - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct amdgpu_device *adev = m->private; int i; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; + if (!ring || !ring->fence_drv.initialized) continue; @@ -914,6 +937,7 @@ static void amdgpu_debugfs_reset_work(struct work_struct *work) reset_work); struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); reset_context.method = AMD_RESET_METHOD_NONE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 01cb89ffbd56..73b8cca35bab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -35,6 +35,7 @@ #endif #include "amdgpu.h" #include <drm/drm_drv.h> +#include <drm/ttm/ttm_tt.h> /* * GART @@ -103,6 +104,142 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) } /** + * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table + * + * @adev: amdgpu_device pointer + * + * Allocate system memory for GART page table for ASICs that don't have + * dedicated VRAM. + * Returns 0 for success, error for failure. + */ +int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev) +{ + unsigned int order = get_order(adev->gart.table_size); + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO; + struct amdgpu_bo *bo = NULL; + struct sg_table *sg = NULL; + struct amdgpu_bo_param bp; + dma_addr_t dma_addr; + struct page *p; + int ret; + + if (adev->gart.bo != NULL) + return 0; + + p = alloc_pages(gfp_flags, order); + if (!p) + return -ENOMEM; + + /* If the hardware does not support UTCL2 snooping of the CPU caches + * then set_memory_wc() could be used as a workaround to mark the pages + * as write combine memory. + */ + dma_addr = dma_map_page(&adev->pdev->dev, p, 0, adev->gart.table_size, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&adev->pdev->dev, dma_addr)) { + dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n"); + __free_pages(p, order); + p = NULL; + return -EFAULT; + } + + dev_info(adev->dev, "%s dma_addr:%pad\n", __func__, &dma_addr); + /* Create SG table */ + sg = kmalloc(sizeof(*sg), GFP_KERNEL); + if (!sg) { + ret = -ENOMEM; + goto error; + } + ret = sg_alloc_table(sg, 1, GFP_KERNEL); + if (ret) + goto error; + + sg_dma_address(sg->sgl) = dma_addr; + sg->sgl->length = adev->gart.table_size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = adev->gart.table_size; +#endif + /* Create SG BO */ + memset(&bp, 0, sizeof(bp)); + bp.size = adev->gart.table_size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_CPU; + bp.type = ttm_bo_type_sg; + bp.resv = NULL; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + bp.flags = 0; + ret = amdgpu_bo_create(adev, &bp, &bo); + if (ret) + goto error; + + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret); + goto error; + } + + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + WARN(ret, "Pinning the GART table failed"); + if (ret) + goto error_resv; + + adev->gart.bo = bo; + adev->gart.ptr = page_to_virt(p); + /* Make GART table accessible in VMID0 */ + ret = amdgpu_ttm_alloc_gart(&adev->gart.bo->tbo); + if (ret) + amdgpu_gart_table_ram_free(adev); + amdgpu_bo_unreserve(bo); + + return 0; + +error_resv: + amdgpu_bo_unreserve(bo); +error: + amdgpu_bo_unref(&bo); + if (sg) { + sg_free_table(sg); + kfree(sg); + } + __free_pages(p, order); + return ret; +} + +/** + * amdgpu_gart_table_ram_free - free gart page table system ram + * + * @adev: amdgpu_device pointer + * + * Free the system memory used for the GART page tableon ASICs that don't + * have dedicated VRAM. + */ +void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) +{ + unsigned int order = get_order(adev->gart.table_size); + struct sg_table *sg = adev->gart.bo->tbo.sg; + struct page *p; + int ret; + + ret = amdgpu_bo_reserve(adev->gart.bo, false); + if (!ret) { + amdgpu_bo_unpin(adev->gart.bo); + amdgpu_bo_unreserve(adev->gart.bo); + } + amdgpu_bo_unref(&adev->gart.bo); + sg_free_table(sg); + kfree(sg); + p = virt_to_page(adev->gart.ptr); + __free_pages(p, order); + + adev->gart.ptr = NULL; +} + +/** * amdgpu_gart_table_vram_alloc - allocate vram for gart page table * * @adev: amdgpu_device pointer @@ -182,7 +319,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } mb(); amdgpu_device_flush_hdp(adev, NULL); - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); drm_dev_exit(idx); @@ -264,7 +401,7 @@ void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev) mb(); amdgpu_device_flush_hdp(adev, NULL); - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 8fea3e04e411..8283d682f543 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -51,6 +51,8 @@ struct amdgpu_gart { uint64_t gart_pte_flags; }; +int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); +void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 863cb668e000..74055cba3dc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -98,7 +98,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, u64 flags, enum ttm_bo_type type, struct dma_resv *resv, - struct drm_gem_object **obj) + struct drm_gem_object **obj, int8_t xcp_id_plus1) { struct amdgpu_bo *bo; struct amdgpu_bo_user *ubo; @@ -116,6 +116,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, bp.flags = flags; bp.domain = initial_domain; bp.bo_ptr_size = sizeof(struct amdgpu_bo); + bp.xcp_id_plus1 = xcp_id_plus1; r = amdgpu_bo_create_user(adev, &bp, &ubo); if (r) @@ -336,7 +337,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, retry: r = amdgpu_gem_object_create(adev, size, args->in.alignment, initial_domain, - flags, ttm_bo_type_device, resv, &gobj); + flags, ttm_bo_type_device, resv, &gobj, fpriv->xcp_id + 1); if (r && r != -ERESTARTSYS) { if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; @@ -379,6 +380,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, struct ttm_operation_ctx ctx = { true, false }; struct amdgpu_device *adev = drm_to_adev(dev); struct drm_amdgpu_gem_userptr *args = data; + struct amdgpu_fpriv *fpriv = filp->driver_priv; struct drm_gem_object *gobj; struct hmm_range *range; struct amdgpu_bo *bo; @@ -405,7 +407,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, /* create a gem object to contain this object in */ r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU, - 0, ttm_bo_type_device, NULL, &gobj); + 0, ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1); if (r) return r; @@ -908,6 +910,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct drm_mode_create_dumb *args) { struct amdgpu_device *adev = drm_to_adev(dev); + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; struct drm_gem_object *gobj; uint32_t handle; u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | @@ -931,7 +934,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, domain = amdgpu_bo_get_preferred_domain(adev, amdgpu_display_supported_domains(adev, flags)); r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags, - ttm_bo_type_device, NULL, &gobj); + ttm_bo_type_device, NULL, &gobj, fpriv->xcp_id + 1); if (r) return -ENOMEM; @@ -948,7 +951,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused) { - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct amdgpu_device *adev = m->private; struct drm_device *dev = adev_to_drm(adev); struct drm_file *file; int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index 637bf51dbf06..f30264782ba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -43,8 +43,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, u64 flags, enum ttm_bo_type type, struct dma_resv *resv, - struct drm_gem_object **obj); - + struct drm_gem_object **obj, int8_t xcp_id_plus1); int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index f3f541ba0aca..a33d4bc34cee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -28,6 +28,7 @@ #include "amdgpu_gfx.h" #include "amdgpu_rlc.h" #include "amdgpu_ras.h" +#include "amdgpu_xcp.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -63,10 +64,10 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, } bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, - int mec, int pipe, int queue) + int xcc_id, int mec, int pipe, int queue) { return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), - adev->gfx.mec.queue_bitmap); + adev->gfx.mec_bitmap[xcc_id].queue_bitmap); } int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, @@ -204,29 +205,38 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) { - int i, queue, pipe; + int i, j, queue, pipe; bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev); int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe, adev->gfx.num_compute_rings); + int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; if (multipipe_policy) { - /* policy: make queues evenly cross all pipes on MEC1 only */ - for (i = 0; i < max_queues_per_mec; i++) { - pipe = i % adev->gfx.mec.num_pipe_per_mec; - queue = (i / adev->gfx.mec.num_pipe_per_mec) % - adev->gfx.mec.num_queue_per_pipe; - - set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, - adev->gfx.mec.queue_bitmap); + /* policy: make queues evenly cross all pipes on MEC1 only + * for multiple xcc, just use the original policy for simplicity */ + for (j = 0; j < num_xcc; j++) { + for (i = 0; i < max_queues_per_mec; i++) { + pipe = i % adev->gfx.mec.num_pipe_per_mec; + queue = (i / adev->gfx.mec.num_pipe_per_mec) % + adev->gfx.mec.num_queue_per_pipe; + + set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, + adev->gfx.mec_bitmap[j].queue_bitmap); + } } } else { /* policy: amdgpu owns all queues in the given pipe */ - for (i = 0; i < max_queues_per_mec; ++i) - set_bit(i, adev->gfx.mec.queue_bitmap); + for (j = 0; j < num_xcc; j++) { + for (i = 0; i < max_queues_per_mec; ++i) + set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap); + } } - dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); + for (j = 0; j < num_xcc; j++) { + dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", + bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); + } } void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) @@ -258,7 +268,7 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) } static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, - struct amdgpu_ring *ring) + struct amdgpu_ring *ring, int xcc_id) { int queue_bit; int mec, pipe, queue; @@ -268,7 +278,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, * adev->gfx.mec.num_queue_per_pipe; while (--queue_bit >= 0) { - if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) + if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap)) continue; amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); @@ -294,9 +304,9 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) + struct amdgpu_irq_src *irq, int xcc_id) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; int r = 0; spin_lock_init(&kiq->ring_lock); @@ -304,16 +314,20 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.kiq; - ring->vm_hub = AMDGPU_GFXHUB_0; - - r = amdgpu_gfx_kiq_acquire(adev, ring); + ring->xcc_id = xcc_id; + ring->vm_hub = AMDGPU_GFXHUB(xcc_id); + ring->doorbell_index = + (adev->doorbell_index.kiq + + xcc_id * adev->doorbell_index.xcc_doorbell_range) + << 1; + + r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id); if (r) return r; ring->eop_gpu_addr = kiq->eop_gpu_addr; ring->no_scheduler = true; - sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); + sprintf(ring->name, "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue); r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) @@ -327,19 +341,19 @@ void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) amdgpu_ring_fini(ring); } -void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev) +void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); } int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, - unsigned hpd_size) + unsigned hpd_size, int xcc_id) { int r; u32 *hpd; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, @@ -362,13 +376,18 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, /* create MQD for each compute/gfx queue */ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, - unsigned mqd_size) + unsigned mqd_size, int xcc_id) { - struct amdgpu_ring *ring = NULL; - int r, i; + int r, i, j; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; + struct amdgpu_ring *ring = &kiq->ring; + u32 domain = AMDGPU_GEM_DOMAIN_GTT; + + /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */ + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) + domain |= AMDGPU_GEM_DOMAIN_VRAM; /* create MQD for KIQ */ - ring = &adev->gfx.kiq.ring; if (!adev->enable_mes_kiq && !ring->mqd_obj) { /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD @@ -387,8 +406,8 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, } /* prepare MQD backup */ - adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL); - if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) + kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL); + if (!kiq->mqd_backup) dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } @@ -398,13 +417,14 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, ring = &adev->gfx.gfx_ring[i]; if (!ring->mqd_obj) { r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + domain, &ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); if (r) { dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); return r; } + ring->mqd_size = mqd_size; /* prepare MQD backup */ adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); if (!adev->gfx.me.mqd_backup[i]) @@ -415,19 +435,21 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, /* create MQD for each KCQ */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; + j = i + xcc_id * adev->gfx.num_compute_rings; + ring = &adev->gfx.compute_ring[j]; if (!ring->mqd_obj) { r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + domain, &ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); if (r) { dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); return r; } + ring->mqd_size = mqd_size; /* prepare MQD backup */ - adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); - if (!adev->gfx.mec.mqd_backup[i]) + adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[j]) dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } } @@ -435,10 +457,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, return 0; } -void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) +void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_ring *ring = NULL; - int i; + int i, j; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) { @@ -451,43 +474,81 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) } for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - kfree(adev->gfx.mec.mqd_backup[i]); + j = i + xcc_id * adev->gfx.num_compute_rings; + ring = &adev->gfx.compute_ring[j]; + kfree(adev->gfx.mec.mqd_backup[j]); amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); } - ring = &adev->gfx.kiq.ring; - kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); + ring = &kiq->ring; + kfree(kiq->mqd_backup); amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); } -int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) +int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; int i, r = 0; + int j; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - spin_lock(&adev->gfx.kiq.ring_lock); + spin_lock(&kiq->ring_lock); if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * adev->gfx.num_compute_rings)) { - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&kiq->ring_lock); return -ENOMEM; } - for (i = 0; i < adev->gfx.num_compute_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + kiq->pmf->kiq_unmap_queues(kiq_ring, + &adev->gfx.compute_ring[j], RESET_QUEUES, 0, 0); + } - if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang) + if (kiq_ring->sched.ready && !adev->job_hang) r = amdgpu_ring_test_helper(kiq_ring); - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&kiq->ring_lock); + + return r; +} + +int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + int i, r = 0; + int j; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + spin_lock(&kiq->ring_lock); + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * + adev->gfx.num_gfx_rings)) { + spin_unlock(&kiq->ring_lock); + return -ENOMEM; + } + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + kiq->pmf->kiq_unmap_queues(kiq_ring, + &adev->gfx.gfx_ring[j], + PREEMPT_QUEUES, 0, 0); + } + } + + if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang) + r = amdgpu_ring_test_helper(kiq_ring); + spin_unlock(&kiq->ring_lock); return r; } @@ -505,18 +566,18 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, return set_resource_bit; } -int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) +int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; + struct amdgpu_ring *kiq_ring = &kiq->ring; uint64_t queue_mask = 0; - int r, i; + int r, i, j; if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) return -EINVAL; for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap)) continue; /* This situation may be hit in the future if a new HW @@ -532,13 +593,15 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, kiq_ring->queue); - spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_device_flush_hdp(adev, NULL); + + spin_lock(&kiq->ring_lock); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * adev->gfx.num_compute_rings + kiq->pmf->set_resources_size); if (r) { DRM_ERROR("Failed to lock KIQ (%d).\n", r); - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&kiq->ring_lock); return r; } @@ -546,11 +609,51 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) queue_mask = ~0ULL; kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); - for (i = 0; i < adev->gfx.num_compute_rings; i++) - kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + kiq->pmf->kiq_map_queues(kiq_ring, + &adev->gfx.compute_ring[j]); + } r = amdgpu_ring_test_helper(kiq_ring); - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&kiq->ring_lock); + if (r) + DRM_ERROR("KCQ enable failed\n"); + + return r; +} + +int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + int r, i, j; + + if (!kiq->pmf || !kiq->pmf->kiq_map_queues) + return -EINVAL; + + amdgpu_device_flush_hdp(adev, NULL); + + spin_lock(&kiq->ring_lock); + /* No need to map kcq on the slave */ + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * + adev->gfx.num_gfx_rings); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + spin_unlock(&kiq->ring_lock); + return r; + } + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + kiq->pmf->kiq_map_queues(kiq_ring, + &adev->gfx.gfx_ring[j]); + } + } + + r = amdgpu_ring_test_helper(kiq_ring); + spin_unlock(&kiq->ring_lock); if (r) DRM_ERROR("KCQ enable failed\n"); @@ -785,12 +888,31 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, return 0; } +void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, + void *ras_error_status, + void (*func)(struct amdgpu_device *adev, void *ras_error_status, + int xcc_id)) +{ + int i; + int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; + uint32_t xcc_mask = GENMASK(num_xcc - 1, 0); + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + if (err_data) { + err_data->ue_count = 0; + err_data->ce_count = 0; + } + + for_each_inst(i, xcc_mask) + func(adev, ras_error_status, i); +} + uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r, cnt = 0; unsigned long flags; uint32_t seq, reg_val_offs = 0, value = 0; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *ring = &kiq->ring; if (amdgpu_device_skip_hw_access(adev)) @@ -858,7 +980,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) signed long r, cnt = 0; unsigned long flags; uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); @@ -1062,3 +1184,125 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE); } } + +bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id) +{ + return !(xcc_id % (adev->gfx.num_xcc_per_xcp ? + adev->gfx.num_xcc_per_xcp : 1)); +} + +static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, + struct device_attribute *addr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int mode; + + mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + AMDGPU_XCP_FL_NONE); + + return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode)); +} + +static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, + struct device_attribute *addr, + const char *buf, size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + enum amdgpu_gfx_partition mode; + int ret = 0, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + if (num_xcc % 2 != 0) + return -EINVAL; + + if (!strncasecmp("SPX", buf, strlen("SPX"))) { + mode = AMDGPU_SPX_PARTITION_MODE; + } else if (!strncasecmp("DPX", buf, strlen("DPX"))) { + /* + * DPX mode needs AIDs to be in multiple of 2. + * Each AID connects 2 XCCs. + */ + if (num_xcc%4) + return -EINVAL; + mode = AMDGPU_DPX_PARTITION_MODE; + } else if (!strncasecmp("TPX", buf, strlen("TPX"))) { + if (num_xcc != 6) + return -EINVAL; + mode = AMDGPU_TPX_PARTITION_MODE; + } else if (!strncasecmp("QPX", buf, strlen("QPX"))) { + if (num_xcc != 8) + return -EINVAL; + mode = AMDGPU_QPX_PARTITION_MODE; + } else if (!strncasecmp("CPX", buf, strlen("CPX"))) { + mode = AMDGPU_CPX_PARTITION_MODE; + } else { + return -EINVAL; + } + + ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode); + + if (ret) + return ret; + + return count; +} + +static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, + struct device_attribute *addr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + char *supported_partition; + + /* TBD */ + switch (NUM_XCC(adev->gfx.xcc_mask)) { + case 8: + supported_partition = "SPX, DPX, QPX, CPX"; + break; + case 6: + supported_partition = "SPX, TPX, CPX"; + break; + case 4: + supported_partition = "SPX, DPX, CPX"; + break; + /* this seems only existing in emulation phase */ + case 2: + supported_partition = "SPX, CPX"; + break; + default: + supported_partition = "Not supported"; + break; + } + + return sysfs_emit(buf, "%s\n", supported_partition); +} + +static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR, + amdgpu_gfx_get_current_compute_partition, + amdgpu_gfx_set_compute_partition); + +static DEVICE_ATTR(available_compute_partition, S_IRUGO, + amdgpu_gfx_get_available_compute_partition, NULL); + +int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) +{ + int r; + + r = device_create_file(adev->dev, &dev_attr_current_compute_partition); + if (r) + return r; + + r = device_create_file(adev->dev, &dev_attr_available_compute_partition); + + return r; +} + +void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) +{ + device_remove_file(adev->dev, &dev_attr_current_compute_partition); + device_remove_file(adev->dev, &dev_attr_available_compute_partition); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index bfabea76d166..ce0f7a8ad4b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -61,7 +61,42 @@ enum amdgpu_gfx_partition { AMDGPU_TPX_PARTITION_MODE = 2, AMDGPU_QPX_PARTITION_MODE = 3, AMDGPU_CPX_PARTITION_MODE = 4, - AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, + AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE = -1, + /* Automatically choose the right mode */ + AMDGPU_AUTO_COMPUTE_PARTITION_MODE = -2, +}; + +#define NUM_XCC(x) hweight16(x) + +enum amdgpu_pkg_type { + AMDGPU_PKG_TYPE_APU = 2, + AMDGPU_PKG_TYPE_UNKNOWN, +}; + +enum amdgpu_gfx_ras_mem_id_type { + AMDGPU_GFX_CP_MEM = 0, + AMDGPU_GFX_GCEA_MEM, + AMDGPU_GFX_GC_CANE_MEM, + AMDGPU_GFX_GCUTCL2_MEM, + AMDGPU_GFX_GDS_MEM, + AMDGPU_GFX_LDS_MEM, + AMDGPU_GFX_RLC_MEM, + AMDGPU_GFX_SP_MEM, + AMDGPU_GFX_SPI_MEM, + AMDGPU_GFX_SQC_MEM, + AMDGPU_GFX_SQ_MEM, + AMDGPU_GFX_TA_MEM, + AMDGPU_GFX_TCC_MEM, + AMDGPU_GFX_TCA_MEM, + AMDGPU_GFX_TCI_MEM, + AMDGPU_GFX_TCP_MEM, + AMDGPU_GFX_TD_MEM, + AMDGPU_GFX_TCX_MEM, + AMDGPU_GFX_ATC_L2_MEM, + AMDGPU_GFX_UTCL2_MEM, + AMDGPU_GFX_VML2_MEM, + AMDGPU_GFX_VML2_WALKER_MEM, + AMDGPU_GFX_MEM_TYPE_NUM }; struct amdgpu_mec { @@ -75,8 +110,10 @@ struct amdgpu_mec { u32 num_mec; u32 num_pipe_per_mec; u32 num_queue_per_pipe; - void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; + void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES]; +}; +struct amdgpu_mec_bitmap { /* These are the resources for which amdgpu takes ownership */ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; @@ -120,6 +157,7 @@ struct amdgpu_kiq { struct amdgpu_ring ring; struct amdgpu_irq_src irq; const struct kiq_pm4_funcs *pmf; + void *mqd_backup; }; /* @@ -230,23 +268,37 @@ struct amdgpu_gfx_ras { struct amdgpu_iv_entry *entry); }; +struct amdgpu_gfx_shadow_info { + u32 shadow_size; + u32 shadow_alignment; + u32 csa_size; + u32 csa_alignment; +}; + struct amdgpu_gfx_funcs { /* get the gpu clock counter */ uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance); - void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, + u32 sh_num, u32 instance, int xcc_id); + void (*read_wave_data)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields); - void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, + void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst); - void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, + void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst); void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, - u32 queue, u32 vmid); + u32 queue, u32 vmid, u32 xcc_id); void (*init_spm_golden)(struct amdgpu_device *adev); void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable); + int (*get_gfx_shadow_info)(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info); + enum amdgpu_gfx_partition + (*query_partition_mode)(struct amdgpu_device *adev); + int (*switch_partition_mode)(struct amdgpu_device *adev, + int num_xccs_per_xcp); + int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node); }; struct sq_work { @@ -296,7 +348,8 @@ struct amdgpu_gfx { struct amdgpu_ce ce; struct amdgpu_me me; struct amdgpu_mec mec; - struct amdgpu_kiq kiq; + struct amdgpu_mec_bitmap mec_bitmap[AMDGPU_MAX_GC_INSTANCES]; + struct amdgpu_kiq kiq[AMDGPU_MAX_GC_INSTANCES]; struct amdgpu_imu imu; bool rs64_enable; /* firmware format */ const struct firmware *me_fw; /* ME firmware */ @@ -376,15 +429,31 @@ struct amdgpu_gfx { struct amdgpu_ring sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; struct amdgpu_ring_mux muxer; - enum amdgpu_gfx_partition partition_mode; - uint32_t num_xcd; + bool cp_gfx_shadow; /* for gfx11 */ + + uint16_t xcc_mask; uint32_t num_xcc_per_xcp; + struct mutex partition_mutex; }; +struct amdgpu_gfx_ras_reg_entry { + struct amdgpu_ras_err_status_reg_entry reg_entry; + enum amdgpu_gfx_ras_mem_id_type mem_id_type; + uint32_t se_num; +}; + +struct amdgpu_gfx_ras_mem_id_entry { + const struct amdgpu_ras_memory_id_entry *mem_id_ent; + uint32_t size; +}; + +#define AMDGPU_GFX_MEMID_ENT(x) {(x), ARRAY_SIZE(x)}, + #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) -#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) -#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid)) +#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id))) +#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id))) #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev)) +#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si))) /** * amdgpu_gfx_create_bitmask - create a bitmask @@ -404,19 +473,21 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq); + struct amdgpu_irq_src *irq, int xcc_id); void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring); -void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); +void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id); int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, - unsigned hpd_size); + unsigned hpd_size, int xcc_id); int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, - unsigned mqd_size); -void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev); -int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev); -int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev); + unsigned mqd_size, int xcc_id); +void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id); +int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id); +int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id); +int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id); +int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id); void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev); void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev); @@ -425,8 +496,8 @@ int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, int pipe, int queue); void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, int *mec, int *pipe, int *queue); -bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, - int pipe, int queue); +bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int xcc_id, + int mec, int pipe, int queue); bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, @@ -458,4 +529,33 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id) int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev); int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); + +bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id); +int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev); +void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev); +void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, + void *ras_error_status, + void (*func)(struct amdgpu_device *adev, void *ras_error_status, + int xcc_id)); + +static inline const char *amdgpu_gfx_compute_mode_desc(int mode) +{ + switch (mode) { + case AMDGPU_SPX_PARTITION_MODE: + return "SPX"; + case AMDGPU_DPX_PARTITION_MODE: + return "DPX"; + case AMDGPU_TPX_PARTITION_MODE: + return "TPX"; + case AMDGPU_QPX_PARTITION_MODE: + return "QPX"; + case AMDGPU_CPX_PARTITION_MODE: + return "CPX"; + default: + return "UNKNOWN"; + } + + return "UNKNOWN"; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 95b0f984acbf..d78bd9732543 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -534,22 +534,21 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) * subject to change when ring number changes * Engine 17: Gart flushes */ -#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 -#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 +#define AMDGPU_VMHUB_INV_ENG_BITMAP 0x1FFF3 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = - {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP, - GFXHUB_FREE_VM_INV_ENGS_BITMAP}; + unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0}; unsigned i; unsigned vmhub, inv_eng; - if (adev->enable_mes) { + /* init the vm inv eng for all vmhubs */ + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { + vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP; /* reserve engine 5 for firmware */ - for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) - vm_inv_engs[vmhub] &= ~(1 << 5); + if (adev->enable_mes) + vm_inv_engs[i] &= ~(1 << 5); } for (i = 0; i < adev->num_rings; ++i) { @@ -671,7 +670,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + hub->ctx_distance * i; - tmp = (hub_type == AMDGPU_GFXHUB_0) ? + tmp = (hub_type == AMDGPU_GFXHUB(0)) ? RREG32_SOC15_IP(GC, reg) : RREG32_SOC15_IP(MMHUB, reg); @@ -680,7 +679,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, else tmp &= ~hub->vm_cntx_cntl_vm_fault; - (hub_type == AMDGPU_GFXHUB_0) ? + (hub_type == AMDGPU_GFXHUB(0)) ? WREG32_SOC15_IP(GC, reg, tmp) : WREG32_SOC15_IP(MMHUB, reg, tmp); } @@ -893,3 +892,47 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) return 0; } + +static ssize_t current_memory_partition_show( + struct device *dev, struct device_attribute *addr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + enum amdgpu_memory_partition mode; + + mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + switch (mode) { + case AMDGPU_NPS1_PARTITION_MODE: + return sysfs_emit(buf, "NPS1\n"); + case AMDGPU_NPS2_PARTITION_MODE: + return sysfs_emit(buf, "NPS2\n"); + case AMDGPU_NPS3_PARTITION_MODE: + return sysfs_emit(buf, "NPS3\n"); + case AMDGPU_NPS4_PARTITION_MODE: + return sysfs_emit(buf, "NPS4\n"); + case AMDGPU_NPS6_PARTITION_MODE: + return sysfs_emit(buf, "NPS6\n"); + case AMDGPU_NPS8_PARTITION_MODE: + return sysfs_emit(buf, "NPS8\n"); + default: + return sysfs_emit(buf, "UNKNOWN\n"); + } + + return sysfs_emit(buf, "UNKNOWN\n"); +} + +static DEVICE_ATTR_RO(current_memory_partition); + +int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev) +{ + if (!adev->gmc.gmc_funcs->query_mem_partition_mode) + return 0; + + return device_create_file(adev->dev, + &dev_attr_current_memory_partition); +} + +void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev) +{ + device_remove_file(adev->dev, &dev_attr_current_memory_partition); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 6d105d7fb98b..56d73fade568 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -63,6 +63,16 @@ struct firmware; +enum amdgpu_memory_partition { + UNKNOWN_MEMORY_PARTITION_MODE = 0, + AMDGPU_NPS1_PARTITION_MODE = 1, + AMDGPU_NPS2_PARTITION_MODE = 2, + AMDGPU_NPS3_PARTITION_MODE = 3, + AMDGPU_NPS4_PARTITION_MODE = 4, + AMDGPU_NPS6_PARTITION_MODE = 6, + AMDGPU_NPS8_PARTITION_MODE = 8, +}; + /* * GMC page fault information */ @@ -119,7 +129,8 @@ struct amdgpu_gmc_funcs { uint32_t vmhub, uint32_t flush_type); /* flush the vm tlb via pasid */ int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, - uint32_t flush_type, bool all_hub); + uint32_t flush_type, bool all_hub, + uint32_t inst); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -137,8 +148,15 @@ struct amdgpu_gmc_funcs { void (*get_vm_pte)(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, uint64_t *flags); + /* override per-page pte flags */ + void (*override_vm_pte_flags)(struct amdgpu_device *dev, + struct amdgpu_vm *vm, + uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + + enum amdgpu_memory_partition (*query_mem_partition_mode)( + struct amdgpu_device *adev); }; struct amdgpu_xgmi_ras { @@ -164,6 +182,21 @@ struct amdgpu_xgmi { struct amdgpu_xgmi_ras *ras; }; +struct amdgpu_mem_partition_info { + union { + struct { + uint32_t fpfn; + uint32_t lpfn; + } range; + struct { + int node; + } numa; + }; + uint64_t size; +}; + +#define INVALID_PFN -1 + struct amdgpu_gmc { /* FB's physical address in MMIO space (for CPU to * map FB). This is different compared to the agp/ @@ -250,7 +283,10 @@ struct amdgpu_gmc { uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER; bool tmz_enabled; + bool is_app_apu; + struct amdgpu_mem_partition_info *mem_partitions; + uint8_t num_mem_partitions; const struct amdgpu_gmc_funcs *gmc_funcs; struct amdgpu_xgmi xgmi; @@ -265,6 +301,8 @@ struct amdgpu_gmc { /* MALL size */ u64 mall_size; + uint32_t m_half_use; + /* number of UMC instances */ int num_umc; /* mode2 save restore */ @@ -296,14 +334,17 @@ struct amdgpu_gmc { }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) -#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \ ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ - ((adev), (pasid), (type), (allhub))) + ((adev), (pasid), (type), (allhub), (inst))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) #define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags)) +#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \ + (adev)->gmc.gmc_funcs->override_vm_pte_flags \ + ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) /** @@ -373,4 +414,7 @@ uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr); uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); int amdgpu_gmc_vram_checking(struct amdgpu_device *adev); +int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev); +void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 2dadcfe43d03..081267161d40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -190,8 +190,8 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, pr_debug("hmm range: start = 0x%lx, end = 0x%lx", hmm_range->start, hmm_range->end); - /* Assuming 512MB takes maxmium 1 second to fault page address */ - timeout = max((hmm_range->end - hmm_range->start) >> 29, 1UL); + /* Assuming 128MB takes maximum 1 second to fault page address */ + timeout = max((hmm_range->end - hmm_range->start) >> 27, 1UL); timeout *= HMM_RANGE_DEFAULT_TIMEOUT; timeout = jiffies + msecs_to_jiffies(timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 4ff348e10e4d..ebeddc9a37e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -136,7 +136,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, uint64_t fence_ctx; uint32_t status = 0, alloc_size; unsigned fence_flags = 0; - bool secure; + bool secure, init_shadow; + u64 shadow_va, csa_va, gds_va; + int vmid = AMDGPU_JOB_GET_VMID(job); unsigned i; int r = 0; @@ -150,9 +152,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, vm = job->vm; fence_ctx = job->base.s_fence ? job->base.s_fence->scheduled.context : 0; + shadow_va = job->shadow_va; + csa_va = job->csa_va; + gds_va = job->gds_va; + init_shadow = job->init_shadow; } else { vm = NULL; fence_ctx = 0; + shadow_va = 0; + csa_va = 0; + gds_va = 0; + init_shadow = false; } if (!ring->sched.ready && !ring->is_mes_queue) { @@ -212,7 +222,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } amdgpu_ring_ib_begin(ring); - if (job && ring->funcs->init_cond_exec) + + if (ring->funcs->emit_gfx_shadow) + amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va, + init_shadow, vmid); + + if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); amdgpu_device_flush_hdp(adev, ring); @@ -263,6 +278,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } + if (ring->funcs->emit_gfx_shadow) { + amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0); + + if (ring->funcs->init_cond_exec) { + unsigned ce_offset = ~0; + + ce_offset = amdgpu_ring_init_cond_exec(ring); + if (ce_offset != ~0 && ring->funcs->patch_cond_exec) + amdgpu_ring_patch_cond_exec(ring, ce_offset); + } + } + r = amdgpu_fence_emit(ring, f, job, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); @@ -436,7 +463,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused) { - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct amdgpu_device *adev = m->private; seq_printf(m, "--------------------- DELAYED --------------------- \n"); amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED], diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index c991ca0b7a1c..ff1ea99292fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -409,7 +409,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (vm->reserved_vmid[vmhub]) { + if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -460,14 +460,11 @@ error: } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, unsigned vmhub) { struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; mutex_lock(&id_mgr->lock); - if (vm->reserved_vmid[vmhub]) - goto unlock; ++id_mgr->reserved_use_count; if (!id_mgr->reserved) { @@ -479,27 +476,23 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, list_del_init(&id->list); id_mgr->reserved = id; } - vm->reserved_vmid[vmhub] = true; -unlock: mutex_unlock(&id_mgr->lock); return 0; } void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, unsigned vmhub) { struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; mutex_lock(&id_mgr->lock); - if (vm->reserved_vmid[vmhub] && - !--id_mgr->reserved_use_count) { + if (!--id_mgr->reserved_use_count) { /* give the reserved ID back to normal round robin */ list_add(&id_mgr->reserved->list, &id_mgr->ids_lru); id_mgr->reserved = NULL; } - vm->reserved_vmid[vmhub] = false; + mutex_unlock(&id_mgr->lock); } @@ -578,6 +571,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); } } + /* alloc a default reserved vmid to enforce isolation */ + if (enforce_isolation) + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); + } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index d1cc09b45da4..fa8c42c83d5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -79,11 +79,9 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - unsigned vmhub); + unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - unsigned vmhub); + unsigned vmhub); int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_job *job, struct dma_fence **fence); void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 1d5af50331e4..fceb3b384955 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -270,7 +270,7 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); entry->timestamp_src = dw[2] >> 31; entry->pasid = dw[3] & 0xffff; - entry->pasid_src = dw[3] >> 31; + entry->node_id = (dw[3] >> 16) & 0xff; entry->src_data[0] = dw[4]; entry->src_data[1] = dw[5]; entry->src_data[2] = dw[6]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index fafebec5b7b6..5273decc5753 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -99,6 +99,21 @@ const char *soc15_ih_clientid_name[] = { "MP1" }; +const int node_id_to_phys_map[NODEID_MAX] = { + [AID0_NODEID] = 0, + [XCD0_NODEID] = 0, + [XCD1_NODEID] = 1, + [AID1_NODEID] = 1, + [XCD2_NODEID] = 2, + [XCD3_NODEID] = 3, + [AID2_NODEID] = 2, + [XCD4_NODEID] = 4, + [XCD5_NODEID] = 5, + [AID3_NODEID] = 3, + [XCD6_NODEID] = 6, + [XCD7_NODEID] = 7, +}; + /** * amdgpu_irq_disable_all - disable *all* interrupts * @@ -109,7 +124,7 @@ const char *soc15_ih_clientid_name[] = { void amdgpu_irq_disable_all(struct amdgpu_device *adev) { unsigned long irqflags; - unsigned i, j, k; + unsigned int i, j, k; int r; spin_lock_irqsave(&adev->irq.lock, irqflags); @@ -124,7 +139,6 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) continue; for (k = 0; k < src->num_types; ++k) { - atomic_set(&src->enabled_types[k], 0); r = src->funcs->set(adev, src, k, AMDGPU_IRQ_STATE_DISABLE); if (r) @@ -268,11 +282,11 @@ int amdgpu_irq_init(struct amdgpu_device *adev) int nvec = pci_msix_vec_count(adev->pdev); unsigned int flags; - if (nvec <= 0) { + if (nvec <= 0) flags = PCI_IRQ_MSI; - } else { + else flags = PCI_IRQ_MSI | PCI_IRQ_MSIX; - } + /* we only need one vector */ nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags); if (nvec > 0) { @@ -331,7 +345,7 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev) */ void amdgpu_irq_fini_sw(struct amdgpu_device *adev) { - unsigned i, j; + unsigned int i, j; for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { if (!adev->irq.client[i].sources) @@ -365,7 +379,7 @@ void amdgpu_irq_fini_sw(struct amdgpu_device *adev) * 0 on success or error code otherwise */ int amdgpu_irq_add_id(struct amdgpu_device *adev, - unsigned client_id, unsigned src_id, + unsigned int client_id, unsigned int src_id, struct amdgpu_irq_src *source) { if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) @@ -417,7 +431,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, { u32 ring_index = ih->rptr >> 2; struct amdgpu_iv_entry entry; - unsigned client_id, src_id; + unsigned int client_id, src_id; struct amdgpu_irq_src *src; bool handled = false; int r; @@ -453,7 +467,8 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, handled = true; } else { - DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id); + DRM_DEBUG("Unregistered interrupt src_id: %d of client_id:%d\n", + src_id, client_id); } /* Send it to amdkfd as well if it isn't already handled */ @@ -492,7 +507,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev, * Updates interrupt state for the specific source (all ASICs). */ int amdgpu_irq_update(struct amdgpu_device *adev, - struct amdgpu_irq_src *src, unsigned type) + struct amdgpu_irq_src *src, unsigned int type) { unsigned long irqflags; enum amdgpu_interrupt_state state; @@ -501,7 +516,8 @@ int amdgpu_irq_update(struct amdgpu_device *adev, spin_lock_irqsave(&adev->irq.lock, irqflags); /* We need to determine after taking the lock, otherwise - we might disable just enabled interrupts again */ + * we might disable just enabled interrupts again + */ if (amdgpu_irq_enabled(adev, src, type)) state = AMDGPU_IRQ_STATE_ENABLE; else @@ -555,7 +571,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) * 0 on success or error code otherwise */ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type) + unsigned int type) { if (!adev->irq.installed) return -ENOENT; @@ -585,7 +601,7 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, * 0 on success or error code otherwise */ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type) + unsigned int type) { if (!adev->irq.installed) return -ENOENT; @@ -619,7 +635,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, * invalid parameters */ bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type) + unsigned int type) { if (!adev->irq.installed) return false; @@ -732,7 +748,7 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev) * Returns: * Linux IRQ */ -unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id) +unsigned int amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned int src_id) { adev->irq.virq[src_id] = irq_create_mapping(adev->irq.domain, src_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index be243adf3e65..04c0b4fa17a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -53,7 +53,7 @@ struct amdgpu_iv_entry { uint64_t timestamp; unsigned timestamp_src; unsigned pasid; - unsigned pasid_src; + unsigned node_id; unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW]; const uint32_t *iv_entry; }; @@ -102,6 +102,24 @@ struct amdgpu_irq { bool retry_cam_enabled; }; +enum interrupt_node_id_per_aid { + AID0_NODEID = 0, + XCD0_NODEID = 1, + XCD1_NODEID = 2, + AID1_NODEID = 4, + XCD2_NODEID = 5, + XCD3_NODEID = 6, + AID2_NODEID = 8, + XCD4_NODEID = 9, + XCD5_NODEID = 10, + AID3_NODEID = 12, + XCD6_NODEID = 13, + XCD7_NODEID = 14, + NODEID_MAX, +}; + +extern const int node_id_to_phys_map[NODEID_MAX]; + void amdgpu_irq_disable_all(struct amdgpu_device *adev); int amdgpu_irq_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index c3d9d75143f4..78476bc75b4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -65,6 +65,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", ti.process_name, ti.tgid, ti.task_name, ti.pid); + dma_fence_set_error(&s_job->s_fence->finished, -ETIME); + if (amdgpu_device_should_recover_gpu(ring->adev)) { struct amdgpu_reset_context reset_context; memset(&reset_context, 0, sizeof(reset_context)); @@ -107,7 +109,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, (*job)->vm = vm; amdgpu_sync_create(&(*job)->explicit_sync); - (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); + (*job)->generation = amdgpu_vm_generation(adev, vm); (*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET; if (!entity) @@ -256,16 +258,27 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct dma_fence *fence = NULL; int r; + /* Ignore soft recovered fences here */ + r = drm_sched_entity_error(s_entity); + if (r && r != -ENODATA) + goto error; + if (!fence && job->gang_submit) fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); while (!fence && job->vm && !job->vmid) { r = amdgpu_vmid_grab(job->vm, ring, job, &fence); - if (r) + if (r) { DRM_ERROR("Error getting VM ID (%d)\n", r); + goto error; + } } return fence; + +error: + dma_fence_set_error(&job->base.s_fence->finished, r); + return NULL; } static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) @@ -282,7 +295,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) trace_amdgpu_sched_run_job(job); /* Skip job if VRAM is lost and never resubmit gangs */ - if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter) || + if (job->generation != amdgpu_vm_generation(adev, job->vm) || (job->job_run_counter && job->gang_submit)) dma_fence_set_error(finished, -ECANCELED); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 52f2e313ea17..a963a25ddd62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -61,12 +61,18 @@ struct amdgpu_job { uint32_t gds_base, gds_size; uint32_t gws_base, gws_size; uint32_t oa_base, oa_size; - uint32_t vram_lost_counter; + uint64_t generation; /* user fence handling */ uint64_t uf_addr; uint64_t uf_sequence; + /* virtual addresses for shadow/GDS/CSA */ + uint64_t shadow_va; + uint64_t csa_va; + uint64_t gds_va; + bool init_shadow; + /* job_run_counter >= 1 means a resubmit job */ uint32_t job_run_counter; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 4fa019c8aefc..3add4b4f0667 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -45,13 +45,14 @@ int amdgpu_jpeg_sw_init(struct amdgpu_device *adev) int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) { - int i; + int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; - amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) + amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]); } mutex_destroy(&adev->jpeg.jpeg_pg_lock); @@ -76,13 +77,14 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, jpeg.idle_work.work); unsigned int fences = 0; - unsigned int i; + unsigned int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; - fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) + fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]); } if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) @@ -122,18 +124,21 @@ int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring) if (amdgpu_sriov_vf(adev)) return 0; - WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; - amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0)); - amdgpu_ring_write(ring, 0xDEADBEEF); + WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 0xCAFEDEAD); + /* Add a read register to make sure the write register is executed. */ + RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]); + + amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0)); + amdgpu_ring_write(ring, 0xABADCAFE); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); - if (tmp == 0xDEADBEEF) + tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]); + if (tmp == 0xABADCAFE) break; udelay(1); } @@ -161,8 +166,7 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle, ib = &job->ibs[0]; - ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, - PACKETJ_TYPE0); + ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0, 0, PACKETJ_TYPE0); ib->ptr[1] = 0xDEADBEEF; for (i = 2; i < 16; i += 2) { ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); @@ -208,7 +212,7 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) } if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); + tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]); if (tmp == 0xDEADBEEF) break; udelay(1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 1471a1ebb034..ffe47e9f5bf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -26,20 +26,22 @@ #include "amdgpu_ras.h" -#define AMDGPU_MAX_JPEG_INSTANCES 2 +#define AMDGPU_MAX_JPEG_INSTANCES 4 +#define AMDGPU_MAX_JPEG_RINGS 8 #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) #define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) struct amdgpu_jpeg_reg{ - unsigned jpeg_pitch; + unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS]; }; struct amdgpu_jpeg_inst { - struct amdgpu_ring ring_dec; + struct amdgpu_ring ring_dec[AMDGPU_MAX_JPEG_RINGS]; struct amdgpu_irq_src irq; struct amdgpu_irq_src ras_poison_irq; struct amdgpu_jpeg_reg external; + uint8_t aid_id; }; struct amdgpu_jpeg_ras { @@ -49,6 +51,7 @@ struct amdgpu_jpeg_ras { struct amdgpu_jpeg { uint8_t num_jpeg_inst; struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES]; + unsigned num_jpeg_rings; struct amdgpu_jpeg_reg internal; unsigned harvest_config; struct delayed_work idle_work; @@ -57,6 +60,9 @@ struct amdgpu_jpeg { atomic_t total_submission_cnt; struct ras_common_if *ras_if; struct amdgpu_jpeg_ras *ras; + + uint16_t inst_mask; + uint8_t num_inst_per_aid; }; int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 0efb38539d70..e3531aa3c8bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -462,8 +462,9 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->jpeg.harvest_config & (1 << i)) continue; - if (adev->jpeg.inst[i].ring_dec.sched.ready) - ++num_rings; + for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) + if (adev->jpeg.inst[i].ring_dec[j].sched.ready) + ++num_rings; } ib_start_alignment = 16; ib_size_alignment = 16; @@ -876,6 +877,19 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) dev_info->gl2c_cache_size = adev->gfx.config.gc_gl2c_per_gpu; dev_info->mall_size = adev->gmc.mall_size; + + if (adev->gfx.funcs->get_gfx_shadow_info) { + struct amdgpu_gfx_shadow_info shadow_info; + + ret = amdgpu_gfx_get_gfx_shadow_info(adev, &shadow_info); + if (!ret) { + dev_info->shadow_size = shadow_info.shadow_size; + dev_info->shadow_alignment = shadow_info.shadow_alignment; + dev_info->csa_size = shadow_info.csa_size; + dev_info->csa_alignment = shadow_info.csa_alignment; + } + } + ret = copy_to_user(out, dev_info, min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0; kfree(dev_info); @@ -1140,6 +1154,15 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) kfree(caps); return r; } + case AMDGPU_INFO_MAX_IBS: { + uint32_t max_ibs[AMDGPU_HW_IP_NUM]; + + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) + max_ibs[i] = amdgpu_ring_max_ibs(i); + + return copy_to_user(out, max_ibs, + min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0; + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; @@ -1210,6 +1233,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) if (r) goto error_pasid; + r = amdgpu_xcp_open_device(adev, fpriv, file_priv); + if (r) + goto error_vm; + r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid); if (r) goto error_vm; @@ -1284,12 +1311,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL) amdgpu_vce_free_handles(adev, file_priv); - if (amdgpu_mcbp) { - /* TODO: how to handle reserve failure */ - BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); - amdgpu_vm_bo_del(adev, fpriv->csa_va); + if (fpriv->csa_va) { + uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; + + WARN_ON(amdgpu_unmap_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, + fpriv->csa_va, csa_addr)); fpriv->csa_va = NULL; - amdgpu_bo_unreserve(adev->virt.csa_obj); } pasid = fpriv->vm.pasid; @@ -1441,7 +1468,7 @@ void amdgpu_disable_vblank_kms(struct drm_crtc *crtc) static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) { - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct amdgpu_device *adev = m->private; struct drm_amdgpu_info_firmware fw_info; struct drm_amdgpu_query_fw query_fw; struct atom_context *ctx = adev->mode_info.atom_context; @@ -1449,7 +1476,7 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) int ret, i; static const char *ta_fw_name[TA_FW_TYPE_MAX_INDEX] = { -#define TA_FW_NAME(type) [TA_FW_TYPE_PSP_##type] = #type +#define TA_FW_NAME(type)[TA_FW_TYPE_PSP_##type] = #type TA_FW_NAME(XGMI), TA_FW_NAME(RAS), TA_FW_NAME(HDCP), @@ -1548,7 +1575,7 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) fw_info.feature, fw_info.ver); /* RLCV */ - query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV; + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLCV; ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index f0f00466b59f..e9091ebfe230 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -924,6 +924,43 @@ error: return r; } +int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, + uint64_t process_context_addr, + uint32_t spi_gdbg_per_vmid_cntl, + const uint32_t *tcp_watch_cntl, + uint32_t flags, + bool trap_en) +{ + struct mes_misc_op_input op_input = {0}; + int r; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes set shader debugger is not supported!\n"); + return -EINVAL; + } + + op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; + op_input.set_shader_debugger.process_context_addr = process_context_addr; + op_input.set_shader_debugger.flags.u32all = flags; + op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl; + memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl, + sizeof(op_input.set_shader_debugger.tcp_watch_cntl)); + + if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> + AMDGPU_MES_API_VERSION_SHIFT) >= 14) + op_input.set_shader_debugger.trap_en = trap_en; + + amdgpu_mes_lock(&adev->mes); + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to set_shader_debugger\n"); + + amdgpu_mes_unlock(&adev->mes); + + return r; +} + static void amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, struct amdgpu_ring *ring, @@ -1305,14 +1342,9 @@ static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings) if (!ring) continue; - r = amdgpu_ring_test_ring(ring); - if (r) { - DRM_DEV_ERROR(ring->adev->dev, - "ring %s test failed (%d)\n", - ring->name, r); + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } else - DRM_INFO("ring %s test pass\n", ring->name); r = amdgpu_ring_test_ib(ring, 1000 * 10); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 547ec35691fa..2d6ac30b7135 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -219,6 +219,8 @@ struct mes_add_queue_input { uint32_t gws_size; uint64_t tba_addr; uint64_t tma_addr; + uint32_t trap_en; + uint32_t skip_process_ctx_clear; uint32_t is_kfd_process; uint32_t is_aql_queue; uint32_t queue_size; @@ -256,6 +258,7 @@ enum mes_misc_opcode { MES_MISC_OP_READ_REG, MES_MISC_OP_WRM_REG_WAIT, MES_MISC_OP_WRM_REG_WR_WAIT, + MES_MISC_OP_SET_SHADER_DEBUGGER, }; struct mes_misc_op_input { @@ -278,6 +281,21 @@ struct mes_misc_op_input { uint32_t reg0; uint32_t reg1; } wrm_reg; + + struct { + uint64_t process_context_addr; + union { + struct { + uint64_t single_memop : 1; + uint64_t single_alu_op : 1; + uint64_t reserved: 30; + }; + uint32_t u32all; + } flags; + uint32_t spi_gdbg_per_vmid_cntl; + uint32_t tcp_watch_cntl[4]; + uint32_t trap_en; + } set_shader_debugger; }; }; @@ -340,6 +358,12 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask); +int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, + uint64_t process_context_addr, + uint32_t spi_gdbg_per_vmid_cntl, + const uint32_t *tcp_watch_cntl, + uint32_t flags, + bool trap_en); int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, int queue_type, int idx, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index d21bb6dae56e..1ca9d4ed8063 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -21,6 +21,29 @@ #ifndef __AMDGPU_MMHUB_H__ #define __AMDGPU_MMHUB_H__ +enum amdgpu_mmhub_ras_memory_id { + AMDGPU_MMHUB_WGMI_PAGEMEM = 0, + AMDGPU_MMHUB_RGMI_PAGEMEM = 1, + AMDGPU_MMHUB_WDRAM_PAGEMEM = 2, + AMDGPU_MMHUB_RDRAM_PAGEMEM = 3, + AMDGPU_MMHUB_WIO_CMDMEM = 4, + AMDGPU_MMHUB_RIO_CMDMEM = 5, + AMDGPU_MMHUB_WGMI_CMDMEM = 6, + AMDGPU_MMHUB_RGMI_CMDMEM = 7, + AMDGPU_MMHUB_WDRAM_CMDMEM = 8, + AMDGPU_MMHUB_RDRAM_CMDMEM = 9, + AMDGPU_MMHUB_MAM_DMEM0 = 10, + AMDGPU_MMHUB_MAM_DMEM1 = 11, + AMDGPU_MMHUB_MAM_DMEM2 = 12, + AMDGPU_MMHUB_MAM_DMEM3 = 13, + AMDGPU_MMHUB_WRET_TAGMEM = 19, + AMDGPU_MMHUB_RRET_TAGMEM = 20, + AMDGPU_MMHUB_WIO_DATAMEM = 21, + AMDGPU_MMHUB_WGMI_DATAMEM = 22, + AMDGPU_MMHUB_WDRAM_DATAMEM = 23, + AMDGPU_MMHUB_MEMORY_BLOCK_LAST, +}; + struct amdgpu_mmhub_ras { struct amdgpu_ras_block_object ras_block; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index c686ff4bcc39..8ab8ae01f87c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -61,6 +61,7 @@ struct amdgpu_nbio_funcs { u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_index_hi_offset)(struct amdgpu_device *adev); u32 (*get_pcie_port_index_offset)(struct amdgpu_device *adev); u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev); u32 (*get_rev_id)(struct amdgpu_device *adev); @@ -95,6 +96,9 @@ struct amdgpu_nbio_funcs { void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev); void (*clear_doorbell_interrupt)(struct amdgpu_device *adev); u32 (*get_rom_offset)(struct amdgpu_device *adev); + int (*get_compute_partition_mode)(struct amdgpu_device *adev); + u32 (*get_memory_partition_mode)(struct amdgpu_device *adev, + u32 *supp_modes); }; struct amdgpu_nbio { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a70103ac0026..f7905bce0de1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -131,15 +131,25 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) u32 c = 0; if (domain & AMDGPU_GEM_DOMAIN_VRAM) { - unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; - - places[c].fpfn = 0; - places[c].lpfn = 0; + unsigned int visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; + int8_t mem_id = KFD_XCP_MEM_ID(adev, abo->xcp_id); + + if (adev->gmc.mem_partitions && mem_id >= 0) { + places[c].fpfn = adev->gmc.mem_partitions[mem_id].range.fpfn; + /* + * memory partition range lpfn is inclusive start + size - 1 + * TTM place lpfn is exclusive start + size + */ + places[c].lpfn = adev->gmc.mem_partitions[mem_id].range.lpfn + 1; + } else { + places[c].fpfn = 0; + places[c].lpfn = 0; + } places[c].mem_type = TTM_PL_VRAM; places[c].flags = 0; if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) - places[c].lpfn = visible_pfn; + places[c].lpfn = min_not_zero(places[c].lpfn, visible_pfn); else places[c].flags |= TTM_PL_FLAG_TOPDOWN; @@ -575,6 +585,13 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bo->flags = bp->flags; + if (adev->gmc.mem_partitions) + /* For GPUs with spatial partitioning, bo->xcp_id=-1 means any partition */ + bo->xcp_id = bp->xcp_id_plus1 - 1; + else + /* For GPUs without spatial partitioning */ + bo->xcp_id = 0; + if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; @@ -611,7 +628,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bo->tbo.resource->mem_type == TTM_PL_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence); + r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true); if (unlikely(r)) goto fail_unreserve; @@ -933,7 +950,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; amdgpu_bo_placement_from_domain(bo, domain); for (i = 0; i < bo->placement.num_placement; i++) { - unsigned fpfn, lpfn; + unsigned int fpfn, lpfn; fpfn = min_offset >> PAGE_SHIFT; lpfn = max_offset >> PAGE_SHIFT; @@ -1014,7 +1031,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) } } -static const char *amdgpu_vram_names[] = { +static const char * const amdgpu_vram_names[] = { "UNKNOWN", "GDDR1", "DDR2", @@ -1042,7 +1059,7 @@ static const char *amdgpu_vram_names[] = { int amdgpu_bo_init(struct amdgpu_device *adev) { /* On A+A platform, VRAM can be mapped as WB */ - if (!adev->gmc.xgmi.connected_to_cpu) { + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { /* reserve PAT memory space to WC for VRAM */ int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); @@ -1078,8 +1095,7 @@ void amdgpu_bo_fini(struct amdgpu_device *adev) amdgpu_ttm_fini(adev); if (drm_dev_enter(adev_to_drm(adev), &idx)) { - - if (!adev->gmc.xgmi.connected_to_cpu) { + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { arch_phys_wc_del(adev->gmc.vram_mtrr); arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); } @@ -1146,8 +1162,8 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) * Returns: * 0 for success or a negative error code on failure. */ -int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, - uint32_t metadata_size, uint64_t flags) +int amdgpu_bo_set_metadata(struct amdgpu_bo *bo, void *metadata, + u32 metadata_size, uint64_t flags) { struct amdgpu_bo_user *ubo; void *buffer; @@ -1266,8 +1282,12 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, void amdgpu_bo_get_memory(struct amdgpu_bo *bo, struct amdgpu_mem_stats *stats) { - unsigned int domain; uint64_t size = amdgpu_bo_size(bo); + unsigned int domain; + + /* Abort if the BO doesn't currently have a backing store */ + if (!bo->tbo.resource) + return; domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); switch (domain) { @@ -1336,7 +1356,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) return; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true); if (!WARN_ON(r)) { amdgpu_bo_fence(abo, fence, false); dma_fence_put(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 35b8106816a1..05496b97ef93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -56,6 +56,8 @@ struct amdgpu_bo_param { bool no_wait_gpu; struct dma_resv *resv; void (*destroy)(struct ttm_buffer_object *bo); + /* xcp partition number plus 1, 0 means any partition */ + int8_t xcp_id_plus1; }; /* bo virtual addresses in a vm */ @@ -108,6 +110,13 @@ struct amdgpu_bo { struct mmu_interval_notifier notifier; #endif struct kgd_mem *kfd_bo; + + /* + * For GPUs with spatial partitioning, xcp partition number, -1 means + * any partition. For other ASICs without spatial partition, always 0 + * for memory accounting. + */ + int8_t xcp_id; }; struct amdgpu_bo_user { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a150b7a4b4aa..e15c27e05564 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -146,6 +146,9 @@ static int psp_init_sriov_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 0): adev->virt.autoload_ucode_id = 0; break; + case IP_VERSION(13, 0, 6): + ret = psp_init_cap_microcode(psp, ucode_prefix); + break; case IP_VERSION(13, 0, 10): adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA; ret = psp_init_cap_microcode(psp, ucode_prefix); @@ -329,6 +332,9 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev, bool ret = false; int i; + if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6)) + return false; + db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET; db_dir_pos = db_header_pos + sizeof(struct psp_runtime_data_header); @@ -411,7 +417,7 @@ static int psp_sw_init(void *handle) if ((psp_get_runtime_db_entry(adev, PSP_RUNTIME_ENTRY_TYPE_PPTABLE_ERR_STATUS, &scpm_entry)) && - (SCPM_DISABLE != scpm_entry.scpm_status)) { + (scpm_entry.scpm_status != SCPM_DISABLE)) { adev->scpm_enabled = true; adev->scpm_status = scpm_entry.scpm_status; } else { @@ -458,10 +464,9 @@ static int psp_sw_init(void *handle) if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) || adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) { - ret= psp_sysfs_init(adev); - if (ret) { + ret = psp_sysfs_init(adev); + if (ret) return ret; - } } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, @@ -474,7 +479,8 @@ static int psp_sw_init(void *handle) return ret; ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, &psp->fence_buf_bo, &psp->fence_buf_mc_addr, &psp->fence_buf); @@ -482,7 +488,8 @@ static int psp_sw_init(void *handle) goto failed1; ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, (void **)&psp->cmd_buf_mem); if (ret) @@ -520,6 +527,8 @@ static int psp_sw_fini(void *handle) kfree(cmd); cmd = NULL; + psp_free_shared_bufs(psp); + if (psp->km_ring.ring_mem) amdgpu_bo_free_kernel(&adev->firmware.rbuf, &psp->km_ring.ring_mem_mc_addr, @@ -560,6 +569,26 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, return -ETIME; } +int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index, + uint32_t reg_val, uint32_t mask, uint32_t msec_timeout) +{ + uint32_t val; + int i; + struct amdgpu_device *adev = psp->adev; + + if (psp->adev->no_hw_access) + return 0; + + for (i = 0; i < msec_timeout; i++) { + val = RREG32(reg_index); + if ((val & mask) == reg_val) + return 0; + msleep(1); + } + + return -ETIME; +} + static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id) { switch (cmd_id) { @@ -643,7 +672,7 @@ psp_cmd_submit_buf(struct psp_context *psp, skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED || psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev); - memcpy((void*)&cmd->resp, (void*)&psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp)); + memcpy(&cmd->resp, &psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp)); /* In some cases, psp response status is not 0 even there is no * problem while the command is submitted. Some version of PSP FW @@ -699,8 +728,13 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp, uint64_t tmr_mc, struct amdgpu_bo *tmr_bo) { struct amdgpu_device *adev = psp->adev; - uint32_t size = amdgpu_bo_size(tmr_bo); - uint64_t tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo); + uint32_t size = 0; + uint64_t tmr_pa = 0; + + if (tmr_bo) { + size = amdgpu_bo_size(tmr_bo); + tmr_pa = amdgpu_gmc_vram_pa(adev, tmr_bo); + } if (amdgpu_sriov_vf(psp->adev)) cmd->cmd_id = GFX_CMD_ID_SETUP_VMR; @@ -745,6 +779,16 @@ static int psp_load_toc(struct psp_context *psp, return ret; } +static bool psp_boottime_tmr(struct psp_context *psp) +{ + switch (psp->adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(13, 0, 6): + return true; + default: + return false; + } +} + /* Set up Trusted Memory Region */ static int psp_tmr_init(struct psp_context *psp) { @@ -816,8 +860,9 @@ static int psp_tmr_load(struct psp_context *psp) cmd = acquire_psp_cmd_buf(psp); psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo); - DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", - amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr); + if (psp->tmr_bo) + DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", + amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -828,7 +873,7 @@ static int psp_tmr_load(struct psp_context *psp) } static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp, - struct psp_gfx_cmd_resp *cmd) + struct psp_gfx_cmd_resp *cmd) { if (amdgpu_sriov_vf(psp->adev)) cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR; @@ -969,6 +1014,27 @@ static int psp_rl_load(struct amdgpu_device *adev) return ret; } +int psp_spatial_partition(struct psp_context *psp, int mode) +{ + struct psp_gfx_cmd_resp *cmd; + int ret; + + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = acquire_psp_cmd_buf(psp); + + cmd->cmd_id = GFX_CMD_ID_SRIOV_SPATIAL_PART; + cmd->cmd.cmd_spatial_part.mode = mode; + + dev_info(psp->adev->dev, "Requesting %d partitions through PSP", mode); + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + + release_psp_cmd_buf(psp); + + return ret; +} + static int psp_asd_initialize(struct psp_context *psp) { int ret; @@ -1065,7 +1131,7 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, struct ta_context *context) { cmd->cmd_id = context->ta_load_type; - cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc); cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc); cmd->cmd.cmd_load_ta.app_len = context->bin_desc.size_bytes; @@ -1136,9 +1202,8 @@ int psp_ta_load(struct psp_context *psp, struct ta_context *context) context->resp_status = cmd->resp.status; - if (!ret) { + if (!ret) context->session_id = cmd->resp.session_id; - } release_psp_cmd_buf(psp); @@ -1254,8 +1319,9 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp) { - return psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) && - psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b; + return (psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) && + psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b) || + psp->adev->ip_versions[MP0_HWIP][0] >= IP_VERSION(13, 0, 6); } /* @@ -1363,6 +1429,9 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, /* Invoke xgmi ta again to get the link information */ if (psp_xgmi_peer_link_info_supported(psp)) { struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output; + bool requires_reflection = + (psp->xgmi_context.supports_extended_data && get_extended_data) || + psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6); xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; @@ -1377,11 +1446,11 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, topology->nodes[i].num_links = get_extended_data ? topology->nodes[i].num_links + link_info_output->nodes[i].num_links : - link_info_output->nodes[i].num_links; + ((requires_reflection && topology->nodes[i].num_links) ? topology->nodes[i].num_links : + link_info_output->nodes[i].num_links); /* reflect the topology information for bi-directionality */ - if (psp->xgmi_context.supports_extended_data && - get_extended_data && topology->nodes[i].num_hops) + if (requires_reflection && topology->nodes[i].num_hops) psp_xgmi_reflect_topology_info(psp, topology->nodes[i]); } } @@ -1465,8 +1534,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_ras_intr_triggered()) return ret; - if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) - { + if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) { DRM_WARN("RAS: Unsupported Interface"); return -EINVAL; } @@ -1476,8 +1544,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) dev_warn(psp->adev->dev, "ECC switch disabled\n"); ras_cmd->ras_status = TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE; - } - else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag) + } else if (ras_cmd->ras_out_message.flags.reg_access_failure_flag) dev_warn(psp->adev->dev, "RAS internal register access blocked\n"); @@ -1573,11 +1640,10 @@ int psp_ras_initialize(struct psp_context *psp) if (ret) dev_warn(adev->dev, "PSP set boot config failed\n"); else - dev_warn(adev->dev, "GECC will be disabled in next boot cycle " - "if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n"); + dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n"); } } else { - if (1 == boot_cfg) { + if (boot_cfg == 1) { dev_info(adev->dev, "GECC is enabled\n"); } else { /* enable GECC in next boot cycle if it is disabled @@ -1607,8 +1673,11 @@ int psp_ras_initialize(struct psp_context *psp) if (amdgpu_ras_is_poison_mode_supported(adev)) ras_cmd->ras_in_message.init_flags.poison_mode_en = 1; - if (!adev->gmc.xgmi.connected_to_cpu) + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) ras_cmd->ras_in_message.init_flags.dgpu_mode = 1; + ras_cmd->ras_in_message.init_flags.xcc_mask = + adev->gfx.xcc_mask; + ras_cmd->ras_in_message.init_flags.channel_dis_num = hweight32(adev->gmc.m_half_use) * 2; ret = psp_ta_load(psp, &psp->ras_context.context); @@ -1626,14 +1695,37 @@ int psp_ras_initialize(struct psp_context *psp) } int psp_ras_trigger_error(struct psp_context *psp, - struct ta_ras_trigger_error_input *info) + struct ta_ras_trigger_error_input *info, uint32_t instance_mask) { struct ta_ras_shared_memory *ras_cmd; + struct amdgpu_device *adev = psp->adev; int ret; + uint32_t dev_mask; if (!psp->ras_context.context.initialized) return -EINVAL; + switch (info->block_id) { + case TA_RAS_BLOCK__GFX: + dev_mask = GET_MASK(GC, instance_mask); + break; + case TA_RAS_BLOCK__SDMA: + dev_mask = GET_MASK(SDMA0, instance_mask); + break; + case TA_RAS_BLOCK__VCN: + case TA_RAS_BLOCK__JPEG: + dev_mask = GET_MASK(VCN, instance_mask); + break; + default: + dev_mask = instance_mask; + break; + } + + /* reuse sub_block_index for backward compatibility */ + dev_mask <<= AMDGPU_RAS_INST_SHIFT; + dev_mask &= AMDGPU_RAS_INST_MASK; + info->sub_block_index |= dev_mask; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); @@ -2077,10 +2169,12 @@ static int psp_hw_start(struct psp_context *psp) if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) goto skip_pin_bo; - ret = psp_tmr_init(psp); - if (ret) { - DRM_ERROR("PSP tmr init failed!\n"); - return ret; + if (!psp_boottime_tmr(psp)) { + ret = psp_tmr_init(psp); + if (ret) { + DRM_ERROR("PSP tmr init failed!\n"); + return ret; + } } skip_pin_bo: @@ -2363,7 +2457,7 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, } static int psp_execute_non_psp_fw_load(struct psp_context *psp, - struct amdgpu_firmware_info *ucode) + struct amdgpu_firmware_info *ucode) { int ret = 0; struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); @@ -2402,9 +2496,8 @@ static int psp_load_smu_fw(struct psp_context *psp) (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) || adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 2)))) { ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD); - if (ret) { + if (ret) DRM_WARN("Failed to set MP1 state prepare for reload\n"); - } } ret = psp_execute_non_psp_fw_load(psp, ucode); @@ -2655,8 +2748,6 @@ static int psp_hw_fini(void *handle) psp_ring_destroy(psp, PSP_RING_TYPE__KM); - psp_free_shared_bufs(psp); - return 0; } @@ -2716,9 +2807,8 @@ static int psp_suspend(void *handle) } ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); - if (ret) { + if (ret) DRM_ERROR("PSP ring stop failed\n"); - } out: return ret; @@ -2967,7 +3057,7 @@ static int parse_sos_bin_descriptor(struct psp_context *psp, psp->sos.fw_version = le32_to_cpu(desc->fw_version); psp->sos.feature_version = le32_to_cpu(desc->fw_version); psp->sos.size_bytes = le32_to_cpu(desc->size_bytes); - psp->sos.start_addr = ucode_start_addr; + psp->sos.start_addr = ucode_start_addr; break; case PSP_FW_TYPE_PSP_SYS_DRV: psp->sys.fw_version = le32_to_cpu(desc->fw_version); @@ -3491,7 +3581,7 @@ void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size drm_dev_exit(idx); } -static DEVICE_ATTR(usbc_pd_fw, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(usbc_pd_fw, 0644, psp_usbc_pd_fw_sysfs_read, psp_usbc_pd_fw_sysfs_write); @@ -3621,6 +3711,7 @@ int amdgpu_psp_sysfs_init(struct amdgpu_device *adev) switch (adev->ip_versions[MP0_HWIP][0]) { case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): + case IP_VERSION(13, 0, 10): if (!psp->adev) { psp->adev = adev; psp_v13_0_set_psp_funcs(psp); @@ -3676,8 +3767,7 @@ static void psp_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_usbc_pd_fw); } -const struct amdgpu_ip_block_version psp_v3_1_ip_block = -{ +const struct amdgpu_ip_block_version psp_v3_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_PSP, .major = 3, .minor = 1, @@ -3685,8 +3775,7 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block = .funcs = &psp_ip_funcs, }; -const struct amdgpu_ip_block_version psp_v10_0_ip_block = -{ +const struct amdgpu_ip_block_version psp_v10_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_PSP, .major = 10, .minor = 0, @@ -3694,8 +3783,7 @@ const struct amdgpu_ip_block_version psp_v10_0_ip_block = .funcs = &psp_ip_funcs, }; -const struct amdgpu_ip_block_version psp_v11_0_ip_block = -{ +const struct amdgpu_ip_block_version psp_v11_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_PSP, .major = 11, .minor = 0, @@ -3711,8 +3799,7 @@ const struct amdgpu_ip_block_version psp_v11_0_8_ip_block = { .funcs = &psp_ip_funcs, }; -const struct amdgpu_ip_block_version psp_v12_0_ip_block = -{ +const struct amdgpu_ip_block_version psp_v12_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_PSP, .major = 12, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index cf4f60c66122..2cae0b1a0b8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -455,6 +455,8 @@ extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block; extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, bool check_changed); +extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index, + uint32_t field_val, uint32_t mask, uint32_t msec_timeout); int psp_gpu_reset(struct amdgpu_device *adev); int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, @@ -486,7 +488,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); int psp_ras_trigger_error(struct psp_context *psp, - struct ta_ras_trigger_error_input *info); + struct ta_ras_trigger_error_input *info, uint32_t instance_mask); int psp_ras_terminate(struct psp_context *psp); int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); @@ -519,6 +521,8 @@ int psp_load_fw_list(struct psp_context *psp, struct amdgpu_firmware_info **ucode_list, int ucode_count); void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size); +int psp_spatial_partition(struct psp_context *psp, int mode); + int is_psp_fw_valid(struct psp_bin_desc bin); int amdgpu_psp_sysfs_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 3ab8a88789c8..4769a18304d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -171,8 +171,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); err_data.err_addr = &err_rec; - amdgpu_umc_fill_error_record(&err_data, address, - (address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0); + amdgpu_umc_fill_error_record(&err_data, address, address, 0, 0); if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, @@ -256,6 +255,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, int block_id; uint32_t sub_block; u64 address, value; + /* default value is 0 if the mask is not set by user */ + u32 instance_mask = 0; if (*pos) return -EINVAL; @@ -306,7 +307,11 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, data->op = op; if (op == 2) { - if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", + if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx 0x%x", + &sub_block, &address, &value, &instance_mask) != 4 && + sscanf(str, "%*s %*s %*s %u %llu %llu %u", + &sub_block, &address, &value, &instance_mask) != 4 && + sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", &sub_block, &address, &value) != 3 && sscanf(str, "%*s %*s %*s %u %llu %llu", &sub_block, &address, &value) != 3) @@ -314,6 +319,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, data->head.sub_block_index = sub_block; data->inject.address = address; data->inject.value = value; + data->inject.instance_mask = instance_mask; } } else { if (size < sizeof(*data)) @@ -326,6 +332,46 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return 0; } +static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev, + struct ras_debug_if *data) +{ + int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; + uint32_t mask, inst_mask = data->inject.instance_mask; + + /* no need to set instance mask if there is only one instance */ + if (num_xcc <= 1 && inst_mask) { + data->inject.instance_mask = 0; + dev_dbg(adev->dev, + "RAS inject mask(0x%x) isn't supported and force it to 0.\n", + inst_mask); + + return; + } + + switch (data->head.block) { + case AMDGPU_RAS_BLOCK__GFX: + mask = GENMASK(num_xcc - 1, 0); + break; + case AMDGPU_RAS_BLOCK__SDMA: + mask = GENMASK(adev->sdma.num_instances - 1, 0); + break; + case AMDGPU_RAS_BLOCK__VCN: + case AMDGPU_RAS_BLOCK__JPEG: + mask = GENMASK(adev->vcn.num_vcn_inst - 1, 0); + break; + default: + mask = inst_mask; + break; + } + + /* remove invalid bits in instance mask */ + data->inject.instance_mask &= mask; + if (inst_mask != data->inject.instance_mask) + dev_dbg(adev->dev, + "Adjust RAS inject mask 0x%x to 0x%x\n", + inst_mask, data->inject.instance_mask); +} + /** * DOC: AMDGPU RAS debugfs control interface * @@ -341,7 +387,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * sub_block_index: some IPs have subcomponets. say, GFX, sDMA. * name: the name of IP. * - * inject has two more members than head, they are address, value. + * inject has three more members than head, they are address, value and mask. * As their names indicate, inject operation will write the * value to the address. * @@ -365,7 +411,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * * echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl * echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl - * echo "inject <block> <error> <sub-block> <address> <value> > /sys/kernel/debug/dri/<N>/ras/ras_ctrl + * echo "inject <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl * * Where N, is the card which you want to affect. * @@ -382,13 +428,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * * The sub-block is a the sub-block index, pass 0 if there is no sub-block. * The address and value are hexadecimal numbers, leading 0x is optional. + * The mask means instance mask, is optional, default value is 0x1. * * For instance, * * .. code-block:: bash * * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl - * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl + * echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl * * How to check the result of the operation? @@ -442,7 +489,8 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, ret = amdgpu_ras_feature_enable(adev, &data.head, 1); break; case 2: - if ((data.inject.address >= adev->gmc.mc_vram_size) || + if ((data.inject.address >= adev->gmc.mc_vram_size && + adev->gmc.mc_vram_size) || (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) { dev_warn(adev->dev, "RAS WARN: input address " "0x%llx is invalid.", @@ -460,6 +508,8 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, break; } + amdgpu_ras_instance_mask_check(adev, &data); + /* data.inject.address is offset instead of absolute gpu address */ ret = amdgpu_ras_error_inject(adev, &data.inject); break; @@ -1115,15 +1165,15 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, block_info.address); } - if (info->head.block == AMDGPU_RAS_BLOCK__GFX) { - if (block_obj->hw_ops->ras_error_inject) - ret = block_obj->hw_ops->ras_error_inject(adev, info); + if (block_obj->hw_ops->ras_error_inject) { + if (info->head.block == AMDGPU_RAS_BLOCK__GFX) + ret = block_obj->hw_ops->ras_error_inject(adev, info, info->instance_mask); + else /* Special ras_error_inject is defined (e.g: xgmi) */ + ret = block_obj->hw_ops->ras_error_inject(adev, &block_info, + info->instance_mask); } else { - /* If defined special ras_error_inject(e.g: xgmi), implement special ras_error_inject */ - if (block_obj->hw_ops->ras_error_inject) - ret = block_obj->hw_ops->ras_error_inject(adev, &block_info); - else /*If not defined .ras_error_inject, use default ras_error_inject*/ - ret = psp_ras_trigger_error(&adev->psp, &block_info); + /* default path */ + ret = psp_ras_trigger_error(&adev->psp, &block_info, info->instance_mask); } if (ret) @@ -1441,6 +1491,7 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *eeprom = &con->eeprom_control; struct drm_minor *minor = adev_to_drm(adev)->primary; struct dentry *dir; @@ -1451,6 +1502,7 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device * &amdgpu_ras_debugfs_eeprom_ops); debugfs_create_u32("bad_page_cnt_threshold", 0444, dir, &con->bad_page_cnt_threshold); + debugfs_create_u32("ras_num_recs", 0444, dir, &eeprom->ras_num_recs); debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled); debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled); debugfs_create_file("ras_eeprom_size", S_IRUGO, dir, adev, @@ -1597,8 +1649,7 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev) void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) { /* Fatal error events are handled on host side */ - if (amdgpu_sriov_vf(adev) || - !amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) + if (amdgpu_sriov_vf(adev)) return; if (adev->nbio.ras && @@ -1636,8 +1687,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * } } - if (!adev->gmc.xgmi.connected_to_cpu) - amdgpu_umc_poison_handler(adev, false); + amdgpu_umc_poison_handler(adev, false); if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption) poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); @@ -2008,9 +2058,15 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) /* Perform full reset in fatal error mode */ if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - else + else { clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) { + ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET; + reset_context.method = AMD_RESET_METHOD_MODE2; + } + } + amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } atomic_set(&ras->in_recovery, 0); @@ -2259,7 +2315,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) atomic_set(&con->in_recovery, 0); con->eeprom_control.bad_channel_bitmap = 0; - max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(); + max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control); amdgpu_ras_validate_threshold(adev, max_eeprom_records_count); /* Todo: During test the SMU might fail to read the eeprom through I2C @@ -2396,11 +2452,10 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) { adev->ras_hw_enabled = adev->ras_enabled = 0; - if (!adev->is_atom_fw || - !amdgpu_ras_asic_supported(adev)) + if (!amdgpu_ras_asic_supported(adev)) return; - if (!adev->gmc.xgmi.connected_to_cpu) { + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { dev_info(adev->dev, "MEM ECC is active.\n"); adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC | @@ -2625,7 +2680,8 @@ release_con: int amdgpu_persistent_edc_harvesting_supported(struct amdgpu_device *adev) { - if (adev->gmc.xgmi.connected_to_cpu) + if (adev->gmc.xgmi.connected_to_cpu || + adev->gmc.is_app_apu) return 1; return 0; } @@ -3104,3 +3160,143 @@ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, return 0; } + +void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name) +{ + if (!err_type_name) + return; + + switch (err_type) { + case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE: + sprintf(err_type_name, "correctable"); + break; + case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE: + sprintf(err_type_name, "uncorrectable"); + break; + default: + sprintf(err_type_name, "unknown"); + break; + } +} + +bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev, + const struct amdgpu_ras_err_status_reg_entry *reg_entry, + uint32_t instance, + uint32_t *memory_id) +{ + uint32_t err_status_lo_data, err_status_lo_offset; + + if (!reg_entry) + return false; + + err_status_lo_offset = + AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance, + reg_entry->seg_lo, reg_entry->reg_lo); + err_status_lo_data = RREG32(err_status_lo_offset); + + if ((reg_entry->flags & AMDGPU_RAS_ERR_STATUS_VALID) && + !REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, ERR_STATUS_VALID_FLAG)) + return false; + + *memory_id = REG_GET_FIELD(err_status_lo_data, ERR_STATUS_LO, MEMORY_ID); + + return true; +} + +bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev, + const struct amdgpu_ras_err_status_reg_entry *reg_entry, + uint32_t instance, + unsigned long *err_cnt) +{ + uint32_t err_status_hi_data, err_status_hi_offset; + + if (!reg_entry) + return false; + + err_status_hi_offset = + AMDGPU_RAS_REG_ENTRY_OFFSET(reg_entry->hwip, instance, + reg_entry->seg_hi, reg_entry->reg_hi); + err_status_hi_data = RREG32(err_status_hi_offset); + + if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) && + !REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, ERR_INFO_VALID_FLAG)) + /* keep the check here in case we need to refer to the result later */ + dev_dbg(adev->dev, "Invalid err_info field\n"); + + /* read err count */ + *err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT); + + return true; +} + +void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev, + const struct amdgpu_ras_err_status_reg_entry *reg_list, + uint32_t reg_list_size, + const struct amdgpu_ras_memory_id_entry *mem_list, + uint32_t mem_list_size, + uint32_t instance, + uint32_t err_type, + unsigned long *err_count) +{ + uint32_t memory_id; + unsigned long err_cnt; + char err_type_name[16]; + uint32_t i, j; + + for (i = 0; i < reg_list_size; i++) { + /* query memory_id from err_status_lo */ + if (!amdgpu_ras_inst_get_memory_id_field(adev, ®_list[i], + instance, &memory_id)) + continue; + + /* query err_cnt from err_status_hi */ + if (!amdgpu_ras_inst_get_err_cnt_field(adev, ®_list[i], + instance, &err_cnt) || + !err_cnt) + continue; + + *err_count += err_cnt; + + /* log the errors */ + amdgpu_ras_get_error_type_name(err_type, err_type_name); + if (!mem_list) { + /* memory_list is not supported */ + dev_info(adev->dev, + "%ld %s hardware errors detected in %s, instance: %d, memory_id: %d\n", + err_cnt, err_type_name, + reg_list[i].block_name, + instance, memory_id); + } else { + for (j = 0; j < mem_list_size; j++) { + if (memory_id == mem_list[j].memory_id) { + dev_info(adev->dev, + "%ld %s hardware errors detected in %s, instance: %d, memory block: %s\n", + err_cnt, err_type_name, + reg_list[i].block_name, + instance, mem_list[j].name); + break; + } + } + } + } +} + +void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, + const struct amdgpu_ras_err_status_reg_entry *reg_list, + uint32_t reg_list_size, + uint32_t instance) +{ + uint32_t err_status_lo_offset, err_status_hi_offset; + uint32_t i; + + for (i = 0; i < reg_list_size; i++) { + err_status_lo_offset = + AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance, + reg_list[i].seg_lo, reg_list[i].reg_lo); + err_status_hi_offset = + AMDGPU_RAS_REG_ENTRY_OFFSET(reg_list[i].hwip, instance, + reg_list[i].seg_hi, reg_list[i].reg_hi); + WREG32(err_status_lo_offset, 0); + WREG32(err_status_hi_offset, 0); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 17b3d1992e80..46bf1889a9d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -32,6 +32,11 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) +/* position of instance value in sub_block_index of + * ta_ras_trigger_error_input, the sub block uses lower 12 bits + */ +#define AMDGPU_RAS_INST_MASK 0xfffff000 +#define AMDGPU_RAS_INST_SHIFT 0xc enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, @@ -314,6 +319,45 @@ enum amdgpu_ras_ret { AMDGPU_RAS_PT, }; +/* ras error status reisger fields */ +#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0 +#define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L +#define ERR_STATUS_LO__MEMORY_ID__SHIFT 0x18 +#define ERR_STATUS_LO__MEMORY_ID_MASK 0xFF000000L +#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG__SHIFT 0x2 +#define ERR_STATUS_HI__ERR_INFO_VALID_FLAG_MASK 0x00000004L +#define ERR_STATUS__ERR_CNT__SHIFT 0x17 +#define ERR_STATUS__ERR_CNT_MASK 0x03800000L + +#define AMDGPU_RAS_REG_ENTRY(ip, inst, reg_lo, reg_hi) \ + ip##_HWIP, inst, reg_lo##_BASE_IDX, reg_lo, reg_hi##_BASE_IDX, reg_hi + +#define AMDGPU_RAS_REG_ENTRY_OFFSET(hwip, ip_inst, segment, reg) \ + (adev->reg_offset[hwip][ip_inst][segment] + (reg)) + +#define AMDGPU_RAS_ERR_INFO_VALID (1 << 0) +#define AMDGPU_RAS_ERR_STATUS_VALID (1 << 1) +#define AMDGPU_RAS_ERR_ADDRESS_VALID (1 << 2) + +#define AMDGPU_RAS_GPU_RESET_MODE2_RESET (0x1 << 0) + +struct amdgpu_ras_err_status_reg_entry { + uint32_t hwip; + uint32_t ip_inst; + uint32_t seg_lo; + uint32_t reg_lo; + uint32_t seg_hi; + uint32_t reg_hi; + uint32_t reg_inst; + uint32_t flags; + const char *block_name; +}; + +struct amdgpu_ras_memory_id_entry { + uint32_t memory_id; + const char *name; +}; + struct ras_common_if { enum amdgpu_ras_block block; enum amdgpu_ras_error_type type; @@ -385,6 +429,9 @@ struct amdgpu_ras { /* Indicates smu whether need update bad channel info */ bool update_channel_flag; + + /* Record special requirements of gpu reset caller */ + uint32_t gpu_reset_flags; }; struct ras_fs_data { @@ -471,6 +518,7 @@ struct ras_inject_if { struct ras_common_if head; uint64_t address; uint64_t value; + uint32_t instance_mask; }; struct ras_cure_if { @@ -508,7 +556,8 @@ struct amdgpu_ras_block_object { }; struct amdgpu_ras_block_hw_ops { - int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); + int (*ras_error_inject)(struct amdgpu_device *adev, + void *inject_if, uint32_t instance_mask); void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); void (*query_ras_error_status)(struct amdgpu_device *adev); void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); @@ -696,4 +745,25 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object *ras_block_obj); void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev); +void amdgpu_ras_get_error_type_name(uint32_t err_type, char *err_type_name); +bool amdgpu_ras_inst_get_memory_id_field(struct amdgpu_device *adev, + const struct amdgpu_ras_err_status_reg_entry *reg_entry, + uint32_t instance, + uint32_t *memory_id); +bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev, + const struct amdgpu_ras_err_status_reg_entry *reg_entry, + uint32_t instance, + unsigned long *err_cnt); +void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev, + const struct amdgpu_ras_err_status_reg_entry *reg_list, + uint32_t reg_list_size, + const struct amdgpu_ras_memory_id_entry *mem_list, + uint32_t mem_list_size, + uint32_t instance, + uint32_t err_type, + unsigned long *err_count); +void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, + const struct amdgpu_ras_err_status_reg_entry *reg_list, + uint32_t reg_list_size, + uint32_t instance); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index c2c2a7718613..0648dfe559af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -68,11 +68,24 @@ /* Table hdr is 'AMDR' */ #define RAS_TABLE_HDR_VAL 0x414d4452 -#define RAS_TABLE_VER 0x00010000 /* Bad GPU tag ‘BADG’ */ #define RAS_TABLE_HDR_BAD 0x42414447 +/* + * EEPROM Table structure v1 + * --------------------------------- + * | | + * | EEPROM TABLE HEADER | + * | ( size 20 Bytes ) | + * | | + * --------------------------------- + * | | + * | BAD PAGE RECORD AREA | + * | | + * --------------------------------- + */ + /* Assume 2-Mbit size EEPROM and take up the whole space. */ #define RAS_TBL_SIZE_BYTES (256 * 1024) #define RAS_TABLE_START 0 @@ -81,6 +94,35 @@ #define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \ / RAS_TABLE_RECORD_SIZE) +/* + * EEPROM Table structrue v2.1 + * --------------------------------- + * | | + * | EEPROM TABLE HEADER | + * | ( size 20 Bytes ) | + * | | + * --------------------------------- + * | | + * | EEPROM TABLE RAS INFO | + * | (available info size 4 Bytes) | + * | ( reserved size 252 Bytes ) | + * | | + * --------------------------------- + * | | + * | BAD PAGE RECORD AREA | + * | | + * --------------------------------- + */ + +/* EEPROM Table V2_1 */ +#define RAS_TABLE_V2_1_INFO_SIZE 256 +#define RAS_TABLE_V2_1_INFO_START RAS_TABLE_HEADER_SIZE +#define RAS_RECORD_START_V2_1 (RAS_HDR_START + RAS_TABLE_HEADER_SIZE + \ + RAS_TABLE_V2_1_INFO_SIZE) +#define RAS_MAX_RECORD_COUNT_V2_1 ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE - \ + RAS_TABLE_V2_1_INFO_SIZE) \ + / RAS_TABLE_RECORD_SIZE) + /* Given a zero-based index of an EEPROM RAS record, yields the EEPROM * offset off of RAS_TABLE_START. That is, this is something you can * add to control->i2c_address, and then tell I2C layer to read @@ -103,6 +145,10 @@ #define RAS_NUM_RECS(_tbl_hdr) (((_tbl_hdr)->tbl_size - \ RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE) +#define RAS_NUM_RECS_V2_1(_tbl_hdr) (((_tbl_hdr)->tbl_size - \ + RAS_TABLE_HEADER_SIZE - \ + RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE) + #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) @@ -230,6 +276,69 @@ static int __write_table_header(struct amdgpu_ras_eeprom_control *control) return res; } +static void +__encode_table_ras_info_to_buf(struct amdgpu_ras_eeprom_table_ras_info *rai, + unsigned char *buf) +{ + u32 *pp = (uint32_t *)buf; + u32 tmp; + + tmp = ((uint32_t)(rai->rma_status) & 0xFF) | + (((uint32_t)(rai->health_percent) << 8) & 0xFF00) | + (((uint32_t)(rai->ecc_page_threshold) << 16) & 0xFFFF0000); + pp[0] = cpu_to_le32(tmp); +} + +static void +__decode_table_ras_info_from_buf(struct amdgpu_ras_eeprom_table_ras_info *rai, + unsigned char *buf) +{ + u32 *pp = (uint32_t *)buf; + u32 tmp; + + tmp = le32_to_cpu(pp[0]); + rai->rma_status = tmp & 0xFF; + rai->health_percent = (tmp >> 8) & 0xFF; + rai->ecc_page_threshold = (tmp >> 16) & 0xFFFF; +} + +static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + u8 *buf; + int res; + + buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL); + if (!buf) { + DRM_ERROR("Failed to alloc buf to write table ras info\n"); + return -ENOMEM; + } + + __encode_table_ras_info_to_buf(&control->tbl_rai, buf); + + /* i2c may be unstable in gpu reset */ + down_read(&adev->reset_domain->sem); + res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus, + control->i2c_address + + control->ras_info_offset, + buf, RAS_TABLE_V2_1_INFO_SIZE); + up_read(&adev->reset_domain->sem); + + if (res < 0) { + DRM_ERROR("Failed to write EEPROM table ras info:%d", res); + } else if (res < RAS_TABLE_V2_1_INFO_SIZE) { + DRM_ERROR("Short write:%d out of %d\n", + res, RAS_TABLE_V2_1_INFO_SIZE); + res = -EIO; + } else { + res = 0; + } + + kfree(buf); + + return res; +} + static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control) { int ii; @@ -246,6 +355,21 @@ static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control) return csum; } +static u8 __calc_ras_info_byte_sum(const struct amdgpu_ras_eeprom_control *control) +{ + int ii; + u8 *pp, csum; + size_t sz; + + sz = sizeof(control->tbl_rai); + pp = (u8 *) &control->tbl_rai; + csum = 0; + for (ii = 0; ii < sz; ii++, pp++) + csum += *pp; + + return csum; +} + static int amdgpu_ras_eeprom_correct_header_tag( struct amdgpu_ras_eeprom_control *control, uint32_t header) @@ -282,6 +406,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) { struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); u8 csum; int res; @@ -289,14 +414,37 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) mutex_lock(&control->ras_tbl_mutex); hdr->header = RAS_TABLE_HDR_VAL; - hdr->version = RAS_TABLE_VER; - hdr->first_rec_offset = RAS_RECORD_START; - hdr->tbl_size = RAS_TABLE_HEADER_SIZE; + if (adev->umc.ras && + adev->umc.ras->set_eeprom_table_version) + adev->umc.ras->set_eeprom_table_version(hdr); + else + hdr->version = RAS_TABLE_VER_V1; + + if (hdr->version == RAS_TABLE_VER_V2_1) { + hdr->first_rec_offset = RAS_RECORD_START_V2_1; + hdr->tbl_size = RAS_TABLE_HEADER_SIZE + + RAS_TABLE_V2_1_INFO_SIZE; + rai->rma_status = GPU_HEALTH_USABLE; + /** + * GPU health represented as a percentage. + * 0 means worst health, 100 means fully health. + */ + rai->health_percent = 100; + /* ecc_page_threshold = 0 means disable bad page retirement */ + rai->ecc_page_threshold = con->bad_page_cnt_threshold; + } else { + hdr->first_rec_offset = RAS_RECORD_START; + hdr->tbl_size = RAS_TABLE_HEADER_SIZE; + } csum = __calc_hdr_byte_sum(control); + if (hdr->version == RAS_TABLE_VER_V2_1) + csum += __calc_ras_info_byte_sum(control); csum = -csum; hdr->checksum = csum; res = __write_table_header(control); + if (!res && hdr->version > RAS_TABLE_VER_V1) + res = __write_table_ras_info(control); control->ras_num_recs = 0; control->ras_fri = 0; @@ -573,11 +721,19 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) "Saved bad pages %d reaches threshold value %d\n", control->ras_num_recs, ras->bad_page_cnt_threshold); control->tbl_hdr.header = RAS_TABLE_HDR_BAD; + if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) { + control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; + control->tbl_rai.health_percent = 0; + } } - control->tbl_hdr.version = RAS_TABLE_VER; - control->tbl_hdr.first_rec_offset = RAS_INDEX_TO_OFFSET(control, control->ras_fri); - control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + + RAS_TABLE_V2_1_INFO_SIZE + + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + else + control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; control->tbl_hdr.checksum = 0; buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE; @@ -606,6 +762,17 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) goto Out; } + /** + * bad page records have been stored in eeprom, + * now calculate gpu health percent + */ + if (amdgpu_bad_page_threshold != 0 && + control->tbl_hdr.version == RAS_TABLE_VER_V2_1 && + control->ras_num_recs < ras->bad_page_cnt_threshold) + control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold - + control->ras_num_recs) * 100) / + ras->bad_page_cnt_threshold; + /* Recalc the checksum. */ csum = 0; @@ -613,10 +780,14 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) csum += *pp; csum += __calc_hdr_byte_sum(control); + if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + csum += __calc_ras_info_byte_sum(control); /* avoid sign extension when assigning to "checksum" */ csum = -csum; control->tbl_hdr.checksum = csum; res = __write_table_header(control); + if (!res && control->tbl_hdr.version > RAS_TABLE_VER_V1) + res = __write_table_ras_info(control); Out: kfree(buf); return res; @@ -807,9 +978,12 @@ Out: return res; } -uint32_t amdgpu_ras_eeprom_max_record_count(void) +uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control) { - return RAS_MAX_RECORD_COUNT; + if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + return RAS_MAX_RECORD_COUNT_V2_1; + else + return RAS_MAX_RECORD_COUNT; } static ssize_t @@ -1051,8 +1225,14 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control int buf_size, res; u8 csum, *buf, *pp; - buf_size = RAS_TABLE_HEADER_SIZE + - control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + buf_size = RAS_TABLE_HEADER_SIZE + + RAS_TABLE_V2_1_INFO_SIZE + + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + else + buf_size = RAS_TABLE_HEADER_SIZE + + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; + buf = kzalloc(buf_size, GFP_KERNEL); if (!buf) { DRM_ERROR("Out of memory checking RAS table checksum.\n"); @@ -1080,6 +1260,39 @@ Out: return res < 0 ? res : csum; } +static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai; + struct amdgpu_device *adev = to_amdgpu_device(control); + unsigned char *buf; + int res; + + buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL); + if (!buf) { + DRM_ERROR("Failed to alloc buf to read EEPROM table ras info\n"); + return -ENOMEM; + } + + /** + * EEPROM table V2_1 supports ras info, + * read EEPROM table ras info + */ + res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus, + control->i2c_address + control->ras_info_offset, + buf, RAS_TABLE_V2_1_INFO_SIZE); + if (res < RAS_TABLE_V2_1_INFO_SIZE) { + DRM_ERROR("Failed to read EEPROM table ras info, res:%d", res); + res = res >= 0 ? -EIO : res; + goto Out; + } + + __decode_table_ras_info_from_buf(rai, buf); + +Out: + kfree(buf); + return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res; +} + int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, bool *exceed_err_limit) { @@ -1102,8 +1315,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, return -EINVAL; control->ras_header_offset = RAS_HDR_START; - control->ras_record_offset = RAS_RECORD_START; - control->ras_max_record_count = RAS_MAX_RECORD_COUNT; + control->ras_info_offset = RAS_TABLE_V2_1_INFO_START; mutex_init(&control->ras_tbl_mutex); /* Read the table header from EEPROM address */ @@ -1117,12 +1329,27 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, __decode_table_header_from_buf(hdr, buf); - control->ras_num_recs = RAS_NUM_RECS(hdr); + if (hdr->version == RAS_TABLE_VER_V2_1) { + control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr); + control->ras_record_offset = RAS_RECORD_START_V2_1; + control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1; + } else { + control->ras_num_recs = RAS_NUM_RECS(hdr); + control->ras_record_offset = RAS_RECORD_START; + control->ras_max_record_count = RAS_MAX_RECORD_COUNT; + } control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); if (hdr->header == RAS_TABLE_HDR_VAL) { DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", control->ras_num_recs); + + if (hdr->version == RAS_TABLE_VER_V2_1) { + res = __read_table_ras_info(control); + if (res) + return res; + } + res = __verify_ras_table_checksum(control); if (res) DRM_ERROR("RAS table incorrect checksum or error:%d\n", @@ -1136,6 +1363,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, ras->bad_page_cnt_threshold); } else if (hdr->header == RAS_TABLE_HDR_BAD && amdgpu_bad_page_threshold != 0) { + if (hdr->version == RAS_TABLE_VER_V2_1) { + res = __read_table_ras_info(control); + if (res) + return res; + } + res = __verify_ras_table_checksum(control); if (res) DRM_ERROR("RAS Table incorrect checksum or error:%d\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 54d9bfe0881d..6dfd667f3013 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -26,8 +26,16 @@ #include <linux/i2c.h> +#define RAS_TABLE_VER_V1 0x00010000 +#define RAS_TABLE_VER_V2_1 0x00021000 + struct amdgpu_device; +enum amdgpu_ras_gpu_health_status { + GPU_HEALTH_USABLE = 0, + GPU_RETIRED__ECC_REACH_THRESHOLD = 2, +}; + enum amdgpu_ras_eeprom_err_type { AMDGPU_RAS_EEPROM_ERR_NA, AMDGPU_RAS_EEPROM_ERR_RECOVERABLE, @@ -43,9 +51,18 @@ struct amdgpu_ras_eeprom_table_header { uint32_t checksum; } __packed; +struct amdgpu_ras_eeprom_table_ras_info { + u8 rma_status; + u8 health_percent; + u16 ecc_page_threshold; + u32 padding[64 - 1]; +} __packed; + struct amdgpu_ras_eeprom_control { struct amdgpu_ras_eeprom_table_header tbl_hdr; + struct amdgpu_ras_eeprom_table_ras_info tbl_rai; + /* Base I2C EEPPROM 19-bit memory address, * where the table is located. For more information, * see top of amdgpu_eeprom.c. @@ -58,6 +75,7 @@ struct amdgpu_ras_eeprom_control { * right after the header. */ u32 ras_header_offset; + u32 ras_info_offset; u32 ras_record_offset; /* Number of records in the table. @@ -124,7 +142,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *records, const u32 num); -uint32_t amdgpu_ras_eeprom_max_record_count(void); +uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control); void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 6437ead87e5f..eec41ad30406 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -40,6 +40,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 2): + case IP_VERSION(13, 0, 6): ret = aldebaran_reset_init(adev); break; case IP_VERSION(11, 0, 7): @@ -61,6 +62,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 2): + case IP_VERSION(13, 0, 6): ret = aldebaran_reset_fini(adev); break; case IP_VERSION(11, 0, 7): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 49de3a3eebc7..80d6e132e409 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -50,6 +50,26 @@ */ /** + * amdgpu_ring_max_ibs - Return max IBs that fit in a single submission. + * + * @type: ring type for which to return the limit. + */ +unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type) +{ + switch (type) { + case AMDGPU_RING_TYPE_GFX: + /* Need to keep at least 192 on GFX7+ for old radv. */ + return 192; + case AMDGPU_RING_TYPE_COMPUTE: + return 125; + case AMDGPU_RING_TYPE_VCN_JPEG: + return 16; + default: + return 49; + } +} + +/** * amdgpu_ring_alloc - allocate space on the ring buffer * * @ring: amdgpu_ring structure holding ring information @@ -58,7 +78,7 @@ * Allocate @ndw dwords in the ring buffer (all asics). * Returns 0 on success, error on failure. */ -int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw) +int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) { /* Align requested size with padding so unlock_commit can * pad safely */ @@ -182,6 +202,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, int sched_hw_submission = amdgpu_sched_hw_submission; u32 *num_sched; u32 hw_ip; + unsigned int max_ibs_dw; /* Set the hw submission limit higher for KIQ because * it's used for a number of gfx/compute tasks by both @@ -290,6 +311,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } + max_ibs_dw = ring->funcs->emit_frame_size + + amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size; + max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask; + + if (WARN_ON(max_ibs_dw > max_dw)) + max_dw = max_ibs_dw; + ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission); ring->buf_mask = (ring->ring_size / 4) - 1; @@ -361,6 +389,8 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) amdgpu_bo_free_kernel(&ring->ring_obj, &ring->gpu_addr, (void **)&ring->ring); + } else { + kfree(ring->fence_drv.fences); } dma_fence_put(ring->vmid_wait); @@ -403,11 +433,18 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, struct dma_fence *fence) { + unsigned long flags; + ktime_t deadline = ktime_add_us(ktime_get(), 10000); if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) return false; + spin_lock_irqsave(fence->lock, flags); + if (!dma_fence_is_signaled_locked(fence)) + dma_fence_set_error(fence, -ENODATA); + spin_unlock_irqrestore(fence->lock, flags); + atomic_inc(&ring->adev->gpu_reset_counter); while (!dma_fence_is_signaled(fence) && ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) @@ -478,6 +515,70 @@ static const struct file_operations amdgpu_debugfs_ring_fops = { .llseek = default_llseek }; +static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_ring *ring = file_inode(f)->i_private; + volatile u32 *mqd; + int r; + uint32_t value, result; + + if (*pos & 3 || size & 3) + return -EINVAL; + + result = 0; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); + if (r) { + amdgpu_bo_unreserve(ring->mqd_obj); + return r; + } + + while (size) { + if (*pos >= ring->mqd_size) + goto done; + + value = mqd[*pos/4]; + r = put_user(value, (uint32_t *)buf); + if (r) + goto done; + buf += 4; + result += 4; + size -= 4; + *pos += 4; + } + +done: + amdgpu_bo_kunmap(ring->mqd_obj); + mqd = NULL; + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + return r; + + return result; +} + +static const struct file_operations amdgpu_debugfs_mqd_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_mqd_read, + .llseek = default_llseek +}; + +static int amdgpu_debugfs_ring_error(void *data, u64 val) +{ + struct amdgpu_ring *ring = data; + + amdgpu_fence_driver_set_error(ring, val); + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(amdgpu_debugfs_error_fops, NULL, + amdgpu_debugfs_ring_error, "%lld\n"); + #endif void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, @@ -489,10 +590,21 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, char name[32]; sprintf(name, "amdgpu_ring_%s", ring->name); - debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, ring, + debugfs_create_file_size(name, S_IFREG | 0444, root, ring, &amdgpu_debugfs_ring_fops, ring->ring_size + 12); + if (ring->mqd_obj) { + sprintf(name, "amdgpu_mqd_%s", ring->name); + debugfs_create_file_size(name, S_IFREG | 0444, root, ring, + &amdgpu_debugfs_mqd_fops, + ring->mqd_size); + } + + sprintf(name, "amdgpu_error_%s", ring->name); + debugfs_create_file(name, 0200, root, ring, + &amdgpu_debugfs_error_fops); + #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 2474cb71e476..028ff075db51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -37,8 +37,8 @@ struct amdgpu_job; struct amdgpu_vm; /* max number of rings */ -#define AMDGPU_MAX_RINGS 28 -#define AMDGPU_MAX_HWIP_RINGS 8 +#define AMDGPU_MAX_RINGS 124 +#define AMDGPU_MAX_HWIP_RINGS 64 #define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_SW_GFX_RINGS 2 #define AMDGPU_MAX_COMPUTE_RINGS 8 @@ -126,6 +126,7 @@ struct amdgpu_fence_driver { extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); +void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); @@ -212,6 +213,8 @@ struct amdgpu_ring_funcs { void (*end_use)(struct amdgpu_ring *ring); void (*emit_switch_buffer) (struct amdgpu_ring *ring); void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); + void (*emit_gfx_shadow)(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va, + u64 gds_va, bool init_shadow, int vmid); void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); @@ -253,12 +256,14 @@ struct amdgpu_ring { uint32_t buf_mask; u32 idx; u32 xcc_id; + u32 xcp_id; u32 me; u32 pipe; u32 queue; struct amdgpu_bo *mqd_obj; uint64_t mqd_gpu_addr; void *mqd_ptr; + unsigned mqd_size; uint64_t eop_gpu_addr; u32 doorbell_index; bool use_doorbell; @@ -312,6 +317,7 @@ struct amdgpu_ring { #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) +#define amdgpu_ring_emit_gfx_shadow(r, s, c, g, i, v) ((r)->funcs->emit_gfx_shadow((r), (s), (c), (g), (i), (v))) #define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) @@ -325,6 +331,7 @@ struct amdgpu_ring { #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) #define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o))) +unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); void amdgpu_ring_ib_begin(struct amdgpu_ring *ring); void amdgpu_ring_ib_end(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index 85fb730d9fc8..35e0ae9acadc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -31,12 +31,13 @@ * amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode * * @adev: amdgpu_device pointer + * @xcc_id: xcc accelerated compute core id * * Set RLC enter into safe mode if RLC is enabled and haven't in safe mode. */ -void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev) +void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id) { - if (adev->gfx.rlc.in_safe_mode) + if (adev->gfx.rlc.in_safe_mode[xcc_id]) return; /* if RLC is not enabled, do nothing */ @@ -46,8 +47,8 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev) if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_3D_CGCG)) { - adev->gfx.rlc.funcs->set_safe_mode(adev); - adev->gfx.rlc.in_safe_mode = true; + adev->gfx.rlc.funcs->set_safe_mode(adev, xcc_id); + adev->gfx.rlc.in_safe_mode[xcc_id] = true; } } @@ -55,12 +56,13 @@ void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev) * amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode * * @adev: amdgpu_device pointer + * @xcc_id: xcc accelerated compute core id * * Set RLC exit safe mode if RLC is enabled and have entered into safe mode. */ -void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev) +void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id) { - if (!(adev->gfx.rlc.in_safe_mode)) + if (!(adev->gfx.rlc.in_safe_mode[xcc_id])) return; /* if RLC is not enabled, do nothing */ @@ -70,8 +72,8 @@ void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev) if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_3D_CGCG)) { - adev->gfx.rlc.funcs->unset_safe_mode(adev); - adev->gfx.rlc.in_safe_mode = false; + adev->gfx.rlc.funcs->unset_safe_mode(adev, xcc_id); + adev->gfx.rlc.in_safe_mode[xcc_id] = false; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 23f060db9255..80b263646966 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -157,8 +157,8 @@ typedef struct _RLC_TABLE_OF_CONTENT { struct amdgpu_rlc_funcs { bool (*is_rlc_enabled)(struct amdgpu_device *adev); - void (*set_safe_mode)(struct amdgpu_device *adev); - void (*unset_safe_mode)(struct amdgpu_device *adev); + void (*set_safe_mode)(struct amdgpu_device *adev, int xcc_id); + void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id); int (*init)(struct amdgpu_device *adev); u32 (*get_csb_size)(struct amdgpu_device *adev); void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer); @@ -201,7 +201,7 @@ struct amdgpu_rlc { u32 cp_table_size; /* safe mode for updating CG/PG state */ - bool in_safe_mode; + bool in_safe_mode[8]; const struct amdgpu_rlc_funcs *funcs; /* for firmware data */ @@ -260,8 +260,8 @@ struct amdgpu_rlc { struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl; }; -void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev); -void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev); +void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id); +void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev, int xcc_id); int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws); int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev); int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 231ca06bc9c7..78ec3420ef85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -64,7 +64,7 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index) } uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, - unsigned vmid) + unsigned int vmid) { struct amdgpu_device *adev = ring->adev; uint64_t csa_mc_addr; @@ -252,6 +252,13 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, if (!duplicate && (instance != i)) continue; else { + /* Use a single copy per SDMA firmware type. PSP uses the same instance for all + * groups of SDMAs */ + if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2) && + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + adev->sdma.num_inst_per_aid == i) { + break; + } info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; info->fw = adev->sdma.instance[i].fw; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index fc8528812598..513ac22120c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -26,7 +26,7 @@ #include "amdgpu_ras.h" /* max number of IP instances */ -#define AMDGPU_MAX_SDMA_INSTANCES 8 +#define AMDGPU_MAX_SDMA_INSTANCES 16 enum amdgpu_sdma_irq { AMDGPU_SDMA_IRQ_INSTANCE0 = 0, @@ -37,9 +37,19 @@ enum amdgpu_sdma_irq { AMDGPU_SDMA_IRQ_INSTANCE5, AMDGPU_SDMA_IRQ_INSTANCE6, AMDGPU_SDMA_IRQ_INSTANCE7, + AMDGPU_SDMA_IRQ_INSTANCE8, + AMDGPU_SDMA_IRQ_INSTANCE9, + AMDGPU_SDMA_IRQ_INSTANCE10, + AMDGPU_SDMA_IRQ_INSTANCE11, + AMDGPU_SDMA_IRQ_INSTANCE12, + AMDGPU_SDMA_IRQ_INSTANCE13, + AMDGPU_SDMA_IRQ_INSTANCE14, + AMDGPU_SDMA_IRQ_INSTANCE15, AMDGPU_SDMA_IRQ_LAST }; +#define NUM_SDMA(x) hweight32(x) + struct amdgpu_sdma_instance { /* SDMA firmware */ const struct firmware *fw; @@ -49,6 +59,35 @@ struct amdgpu_sdma_instance { struct amdgpu_ring ring; struct amdgpu_ring page; bool burst_nop; + uint32_t aid_id; +}; + +enum amdgpu_sdma_ras_memory_id { + AMDGPU_SDMA_MBANK_DATA_BUF0 = 1, + AMDGPU_SDMA_MBANK_DATA_BUF1 = 2, + AMDGPU_SDMA_MBANK_DATA_BUF2 = 3, + AMDGPU_SDMA_MBANK_DATA_BUF3 = 4, + AMDGPU_SDMA_MBANK_DATA_BUF4 = 5, + AMDGPU_SDMA_MBANK_DATA_BUF5 = 6, + AMDGPU_SDMA_MBANK_DATA_BUF6 = 7, + AMDGPU_SDMA_MBANK_DATA_BUF7 = 8, + AMDGPU_SDMA_MBANK_DATA_BUF8 = 9, + AMDGPU_SDMA_MBANK_DATA_BUF9 = 10, + AMDGPU_SDMA_MBANK_DATA_BUF10 = 11, + AMDGPU_SDMA_MBANK_DATA_BUF11 = 12, + AMDGPU_SDMA_MBANK_DATA_BUF12 = 13, + AMDGPU_SDMA_MBANK_DATA_BUF13 = 14, + AMDGPU_SDMA_MBANK_DATA_BUF14 = 15, + AMDGPU_SDMA_MBANK_DATA_BUF15 = 16, + AMDGPU_SDMA_UCODE_BUF = 17, + AMDGPU_SDMA_RB_CMD_BUF = 18, + AMDGPU_SDMA_IB_CMD_BUF = 19, + AMDGPU_SDMA_UTCL1_RD_FIFO = 20, + AMDGPU_SDMA_UTCL1_RDBST_FIFO = 21, + AMDGPU_SDMA_UTCL1_WR_FIFO = 22, + AMDGPU_SDMA_DATA_LUT_FIFO = 23, + AMDGPU_SDMA_SPLIT_DAT_BUF = 24, + AMDGPU_SDMA_MEMORY_BLOCK_LAST, }; struct amdgpu_sdma_ras { @@ -66,6 +105,8 @@ struct amdgpu_sdma { struct amdgpu_irq_src srbm_write_irq; int num_instances; + uint32_t sdma_mask; + int num_inst_per_aid; uint32_t srbm_soft_reset; bool has_page_queue; struct ras_common_if *ras_if; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index c7a823f3f2c5..89c38d864471 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -30,6 +30,7 @@ struct amdgpu_smuio_funcs { void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags); u32 (*get_die_id)(struct amdgpu_device *adev); u32 (*get_socket_id)(struct amdgpu_device *adev); + enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2cd081cbf706..0534ab716809 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -38,7 +38,6 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/swap.h> -#include <linux/swiotlb.h> #include <linux/dma-buf.h> #include <linux/sizes.h> #include <linux/module.h> @@ -65,7 +64,7 @@ MODULE_IMPORT_NS(DMA_BUF); -#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128 +#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128) static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, struct ttm_tt *ttm, @@ -184,11 +183,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, struct ttm_resource *mem, struct amdgpu_res_cursor *mm_cur, - unsigned window, struct amdgpu_ring *ring, + unsigned int window, struct amdgpu_ring *ring, bool tmz, uint64_t *size, uint64_t *addr) { struct amdgpu_device *adev = ring->adev; - unsigned offset, num_pages, num_dw, num_bytes; + unsigned int offset, num_pages, num_dw, num_bytes; uint64_t src_addr, dst_addr; struct amdgpu_job *job; void *cpu_addr; @@ -229,7 +228,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4 + num_bytes, AMDGPU_IB_POOL_DELAYED, &job); @@ -384,7 +383,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence); + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence, + false); if (r) { goto error; } else if (wipe_fence) { @@ -631,6 +631,7 @@ struct amdgpu_ttm_tt { struct task_struct *usertask; uint32_t userflags; bool bound; + int32_t pool_id; }; #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) @@ -800,6 +801,44 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, sg_free_table(ttm->sg); } +/* + * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ... + * MQDn+CtrlStackn where n is the number of XCCs per partition. + * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD + * and uses memory type default, UC. The rest of pages_per_xcc are + * Ctrl stack and modify their memory type to NC. + */ +static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, + struct ttm_tt *ttm, uint64_t flags) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + uint64_t total_pages = ttm->num_pages; + int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); + uint64_t page_idx, pages_per_xcc; + int i; + uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | + AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); + + pages_per_xcc = total_pages; + do_div(pages_per_xcc, num_xcc); + + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { + /* MQD page: use default flags */ + amdgpu_gart_bind(adev, + gtt->offset + (page_idx << PAGE_SHIFT), + 1, >t->ttm.dma_address[page_idx], flags); + /* + * Ctrl pages - modify the memory type to NC (ctrl_flags) from + * the second page of the BO onward. + */ + amdgpu_gart_bind(adev, + gtt->offset + ((page_idx + 1) << PAGE_SHIFT), + pages_per_xcc - 1, + >t->ttm.dma_address[page_idx + 1], + ctrl_flags); + } +} + static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, struct ttm_buffer_object *tbo, uint64_t flags) @@ -812,21 +851,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, flags |= AMDGPU_PTE_TMZ; if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { - uint64_t page_idx = 1; - - amdgpu_gart_bind(adev, gtt->offset, page_idx, - gtt->ttm.dma_address, flags); - - /* The memory type of the first page defaults to UC. Now - * modify the memory type to NC from the second page of - * the BO onward. - */ - flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; - flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); - - amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), - ttm->num_pages - page_idx, - &(gtt->ttm.dma_address[page_idx]), flags); + amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags); } else { amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, gtt->ttm.dma_address, flags); @@ -1029,15 +1054,20 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_ttm_tt *gtt; enum ttm_caching caching; gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); - if (gtt == NULL) { + if (!gtt) return NULL; - } + gtt->gobj = &bo->base; + if (adev->gmc.mem_partitions && abo->xcp_id >= 0) + gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id); + else + gtt->pool_id = abo->xcp_id; if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) caching = ttm_write_combined; @@ -1064,6 +1094,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); + struct ttm_pool *pool; pgoff_t i; int ret; @@ -1078,7 +1109,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) return 0; - ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); + if (adev->mman.ttm_pools && gtt->pool_id >= 0) + pool = &adev->mman.ttm_pools[gtt->pool_id]; + else + pool = &adev->mman.bdev.pool; + ret = ttm_pool_alloc(pool, ttm, ctx); if (ret) return ret; @@ -1099,6 +1134,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, { struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); struct amdgpu_device *adev; + struct ttm_pool *pool; pgoff_t i; amdgpu_ttm_backend_unbind(bdev, ttm); @@ -1117,7 +1153,13 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, ttm->pages[i]->mapping = NULL; adev = amdgpu_ttm_adev(bdev); - return ttm_pool_free(&adev->mman.bdev.pool, ttm); + + if (adev->mman.ttm_pools && gtt->pool_id >= 0) + pool = &adev->mman.ttm_pools[gtt->pool_id]; + else + pool = &adev->mman.bdev.pool; + + return ttm_pool_free(pool, ttm); } /** @@ -1414,7 +1456,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, memcpy(adev->mman.sdma_access_ptr, buf, len); num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job); @@ -1623,14 +1665,15 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) return 0; } -static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) +static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev, + uint32_t reserve_size) { struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; memset(ctx, 0, sizeof(*ctx)); ctx->c2p_train_data_offset = - ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M); + ALIGN((adev->gmc.mc_vram_size - reserve_size - SZ_1M), SZ_1M); ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); ctx->train_data_size = @@ -1648,11 +1691,12 @@ static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) */ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) { - int ret; struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; bool mem_train_support = false; + uint32_t reserve_size = 0; + int ret; - if (!amdgpu_sriov_vf(adev)) { + if (adev->bios && !amdgpu_sriov_vf(adev)) { if (amdgpu_atomfirmware_mem_training_supported(adev)) mem_train_support = true; else @@ -1666,14 +1710,18 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) * Otherwise, fallback to legacy approach to check and reserve tmr block for ip * discovery data and G6 memory training data respectively */ - adev->mman.discovery_tmr_size = - amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); - if (!adev->mman.discovery_tmr_size) - adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET; + if (adev->bios) + reserve_size = + amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); + + if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + reserve_size = max(reserve_size, (uint32_t)280 << 20); + else if (!reserve_size) + reserve_size = DISCOVERY_TMR_OFFSET; if (mem_train_support) { /* reserve vram for mem train according to TMR location */ - amdgpu_ttm_training_data_block_init(adev); + amdgpu_ttm_training_data_block_init(adev, reserve_size); ret = amdgpu_bo_create_kernel_at(adev, ctx->c2p_train_data_offset, ctx->train_data_size, @@ -1687,20 +1735,58 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; } - ret = amdgpu_bo_create_kernel_at(adev, - adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, - adev->mman.discovery_tmr_size, - &adev->mman.discovery_memory, - NULL); - if (ret) { - DRM_ERROR("alloc tmr failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); - return ret; + if (!adev->gmc.is_app_apu) { + ret = amdgpu_bo_create_kernel_at( + adev, adev->gmc.real_vram_size - reserve_size, + reserve_size, &adev->mman.fw_reserved_memory, NULL); + if (ret) { + DRM_ERROR("alloc tmr failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, + NULL, NULL); + return ret; + } + } else { + DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n"); } return 0; } +static int amdgpu_ttm_pools_init(struct amdgpu_device *adev) +{ + int i; + + if (!adev->gmc.is_app_apu || !adev->gmc.num_mem_partitions) + return 0; + + adev->mman.ttm_pools = kcalloc(adev->gmc.num_mem_partitions, + sizeof(*adev->mman.ttm_pools), + GFP_KERNEL); + if (!adev->mman.ttm_pools) + return -ENOMEM; + + for (i = 0; i < adev->gmc.num_mem_partitions; i++) { + ttm_pool_init(&adev->mman.ttm_pools[i], adev->dev, + adev->gmc.mem_partitions[i].numa.node, + false, false); + } + return 0; +} + +static void amdgpu_ttm_pools_fini(struct amdgpu_device *adev) +{ + int i; + + if (!adev->gmc.is_app_apu || !adev->mman.ttm_pools) + return; + + for (i = 0; i < adev->gmc.num_mem_partitions; i++) + ttm_pool_fini(&adev->mman.ttm_pools[i]); + + kfree(adev->mman.ttm_pools); + adev->mman.ttm_pools = NULL; +} + /* * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. @@ -1727,6 +1813,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("failed initializing buffer object driver(%d).\n", r); return r; } + + r = amdgpu_ttm_pools_init(adev); + if (r) { + DRM_ERROR("failed to init ttm pools(%d).\n", r); + return r; + } adev->mman.initialized = true; /* Initialize VRAM pool with all of VRAM divided into pages */ @@ -1744,6 +1836,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, adev->gmc.visible_vram_size); + else if (adev->gmc.is_app_apu) + DRM_DEBUG_DRIVER( + "No need to ioremap when real vram size is 0\n"); else #endif adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, @@ -1755,9 +1850,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) *place on the VRAM, so reserve it early. */ r = amdgpu_ttm_fw_reserve_vram_init(adev); - if (r) { + if (r) return r; - } /* *The reserved vram for driver must be pinned to the specified @@ -1781,49 +1875,46 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* allocate memory as required for VGA * This is used for VGA emulation and pre-OS scanout buffers to * avoid display artifacts while transitioning between pre-OS - * and driver. */ - r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, - &adev->mman.stolen_vga_memory, - NULL); - if (r) - return r; - r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, - adev->mman.stolen_extended_size, - &adev->mman.stolen_extended_memory, - NULL); - if (r) - return r; - r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset, - adev->mman.stolen_reserved_size, - &adev->mman.stolen_reserved_memory, - NULL); - if (r) - return r; + * and driver. + */ + if (!adev->gmc.is_app_apu) { + r = amdgpu_bo_create_kernel_at(adev, 0, + adev->mman.stolen_vga_size, + &adev->mman.stolen_vga_memory, + NULL); + if (r) + return r; - DRM_INFO("amdgpu: %uM of VRAM memory ready\n", - (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); - - /* Compute GTT size, either based on 1/2 the size of RAM size - * or whatever the user passed on module init */ - if (amdgpu_gtt_size == -1) { - struct sysinfo si; - - si_meminfo(&si); - /* Certain GL unit tests for large textures can cause problems - * with the OOM killer since there is no way to link this memory - * to a process. This was originally mitigated (but not necessarily - * eliminated) by limiting the GTT size. The problem is this limit - * is often too low for many modern games so just make the limit 1/2 - * of system memory which aligns with TTM. The OOM accounting needs - * to be addressed, but we shouldn't prevent common 3D applications - * from being usable just to potentially mitigate that corner case. - */ - gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - (u64)si.totalram * si.mem_unit / 2); + r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, + adev->mman.stolen_extended_size, + &adev->mman.stolen_extended_memory, + NULL); + + if (r) + return r; + + r = amdgpu_bo_create_kernel_at(adev, + adev->mman.stolen_reserved_offset, + adev->mman.stolen_reserved_size, + &adev->mman.stolen_reserved_memory, + NULL); + if (r) + return r; } else { - gtt_size = (uint64_t)amdgpu_gtt_size << 20; + DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n"); } + DRM_INFO("amdgpu: %uM of VRAM memory ready\n", + (unsigned int)(adev->gmc.real_vram_size / (1024 * 1024))); + + /* Compute GTT size, either based on TTM limit + * or whatever the user passed on module init. + */ + if (amdgpu_gtt_size == -1) + gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT; + else + gtt_size = (uint64_t)amdgpu_gtt_size << 20; + /* Initialize GTT memory pool */ r = amdgpu_gtt_mgr_init(adev, gtt_size); if (r) { @@ -1831,7 +1922,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } DRM_INFO("amdgpu: %uM of GTT memory ready.\n", - (unsigned)(gtt_size / (1024 * 1024))); + (unsigned int)(gtt_size / (1024 * 1024))); /* Initialize preemptible memory pool */ r = amdgpu_preempt_mgr_init(adev); @@ -1858,7 +1949,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("Failed initializing oa heap.\n"); return r; } - if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mman.sdma_access_bo, NULL, @@ -1874,18 +1964,24 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) void amdgpu_ttm_fini(struct amdgpu_device *adev) { int idx; + if (!adev->mman.initialized) return; + amdgpu_ttm_pools_fini(adev); + amdgpu_ttm_training_reserve_vram_fini(adev); /* return the stolen vga memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); - amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); - /* return the IP Discovery TMR memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); - if (adev->mman.stolen_reserved_size) - amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, - NULL, NULL); + if (!adev->gmc.is_app_apu) { + amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); + /* return the FW reserved memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->mman.fw_reserved_memory, NULL, + NULL); + if (adev->mman.stolen_reserved_size) + amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, + NULL, NULL); + } amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); amdgpu_ttm_fw_reserve_vram_fini(adev); @@ -1927,7 +2023,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) int r; if (!adev->mman.initialized || amdgpu_in_reset(adev) || - adev->mman.buffer_funcs_enabled == enable) + adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu) return; if (enable) { @@ -1936,7 +2032,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) ring = adev->mman.buffer_funcs_ring; sched = &ring->sched; - r = drm_sched_entity_init(&adev->mman.entity, + r = drm_sched_entity_init(&adev->mman.high_pr, DRM_SCHED_PRIORITY_KERNEL, &sched, 1, NULL); if (r) { @@ -1944,8 +2040,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) r); return; } + + r = drm_sched_entity_init(&adev->mman.low_pr, + DRM_SCHED_PRIORITY_NORMAL, &sched, + 1, NULL); + if (r) { + DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", + r); + goto error_free_entity; + } } else { - drm_sched_entity_destroy(&adev->mman.entity); + drm_sched_entity_destroy(&adev->mman.high_pr); + drm_sched_entity_destroy(&adev->mman.low_pr); dma_fence_put(man->move); man->move = NULL; } @@ -1957,6 +2063,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) size = adev->gmc.visible_vram_size; man->size = size; adev->mman.buffer_funcs_enabled = enable; + + return; + +error_free_entity: + drm_sched_entity_destroy(&adev->mman.high_pr); } static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, @@ -1964,14 +2075,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, unsigned int num_dw, struct dma_resv *resv, bool vm_needs_flush, - struct amdgpu_job **job) + struct amdgpu_job **job, + bool delayed) { enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_DELAYED; int r; - - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, + struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr : + &adev->mman.high_pr; + r = amdgpu_job_alloc_with_ib(adev, entity, AMDGPU_FENCE_OWNER_UNDEFINED, num_dw * 4, pool, job); if (r) @@ -1997,10 +2110,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, bool vm_needs_flush, bool tmz) { struct amdgpu_device *adev = ring->adev; - unsigned num_loops, num_dw; + unsigned int num_loops, num_dw; struct amdgpu_job *job; uint32_t max_bytes; - unsigned i; + unsigned int i; int r; if (!direct_submit && !ring->sched.ready) { @@ -2012,7 +2125,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, - resv, vm_needs_flush, &job); + resv, vm_needs_flush, &job, false); if (r) return r; @@ -2048,7 +2161,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, uint64_t dst_addr, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, - bool vm_needs_flush) + bool vm_needs_flush, bool delayed) { struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; @@ -2061,7 +2174,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush, - &job); + &job, delayed); if (r) return r; @@ -2084,7 +2197,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, - struct dma_fence **f) + struct dma_fence **f, + bool delayed) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -2113,7 +2227,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, goto error; r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv, - &next, true); + &next, true, delayed); if (r) goto error; @@ -2164,7 +2278,7 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) { - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct amdgpu_device *adev = m->private; return ttm_pool_debugfs(&adev->mman.bdev.pool, m); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index e2cd5894afc9..6d0d66e40db9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -49,6 +49,7 @@ struct amdgpu_gtt_mgr { struct amdgpu_mman { struct ttm_device bdev; + struct ttm_pool *ttm_pools; bool initialized; void __iomem *aper_base_kaddr; @@ -58,8 +59,10 @@ struct amdgpu_mman { bool buffer_funcs_enabled; struct mutex gtt_window_lock; - /* Scheduler entity for buffer moves */ - struct drm_sched_entity entity; + /* High priority scheduler entity for buffer moves */ + struct drm_sched_entity high_pr; + /* Low priority scheduler entity for VRAM clearing */ + struct drm_sched_entity low_pr; struct amdgpu_vram_mgr vram_mgr; struct amdgpu_gtt_mgr gtt_mgr; @@ -78,7 +81,8 @@ struct amdgpu_mman { /* discovery */ uint8_t *discovery_bin; uint32_t discovery_tmr_size; - struct amdgpu_bo *discovery_memory; + /* fw reserved memory */ + struct amdgpu_bo *fw_reserved_memory; /* firmware VRAM reservation */ u64 fw_vram_usage_start_offset; @@ -150,7 +154,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, - struct dma_fence **fence); + struct dma_fence **fence, + bool delayed); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index f76b1cb8baf8..16807ff96dc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -748,7 +748,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, const struct imu_firmware_header_v1_0 *imu_hdr = NULL; u8 *ucode_addr; - if (NULL == ucode->fw) + if (!ucode->fw) return 0; ucode->mc_addr = mc_addr; @@ -972,7 +972,7 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, uint8_t *src_addr = NULL; uint8_t *dst_addr = NULL; - if (NULL == ucode->fw) + if (!ucode->fw) return 0; comm_hdr = (const struct common_firmware_header *)ucode->fw->data; @@ -1043,6 +1043,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) if (i == AMDGPU_UCODE_ID_CP_MEC1 && adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { const struct gfx_firmware_header_v1_0 *cp_hdr; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset, adev->firmware.fw_buf_ptr + fw_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 1edf8e6aeb16..db0d94ca4ffc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -169,27 +169,31 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) { int ret = AMDGPU_RAS_SUCCESS; - if (!amdgpu_sriov_vf(adev)) { - if (!adev->gmc.xgmi.connected_to_cpu) { - struct ras_err_data err_data = {0, 0, 0, NULL}; - struct ras_common_if head = { - .block = AMDGPU_RAS_BLOCK__UMC, - }; - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); - - ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); - - if (ret == AMDGPU_RAS_SUCCESS && obj) { - obj->err_data.ue_count += err_data.ue_count; - obj->err_data.ce_count += err_data.ce_count; - } - } else if (reset) { + if (adev->gmc.xgmi.connected_to_cpu || + adev->gmc.is_app_apu) { + if (reset) { /* MCA poison handler is only responsible for GPU reset, * let MCA notifier do page retirement. */ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); amdgpu_ras_reset_gpu(adev); } + return ret; + } + + if (!amdgpu_sriov_vf(adev)) { + struct ras_err_data err_data = {0, 0, 0, NULL}; + struct ras_common_if head = { + .block = AMDGPU_RAS_BLOCK__UMC, + }; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + + ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); + + if (ret == AMDGPU_RAS_SUCCESS && obj) { + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + } } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) adev->virt.ops->ras_poison_handler(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 86133f77a9a4..43321f57f557 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -59,6 +59,8 @@ struct amdgpu_umc_ras { void *ras_error_status); void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); + /* support different eeprom table version for different asic */ + void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr); }; struct amdgpu_umc_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h index 919d9d401750..107f9bb0e24f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h @@ -35,17 +35,51 @@ struct amdgpu_debugfs_regs2_iocdata { } srbm; }; +struct amdgpu_debugfs_regs2_iocdata_v2 { + __u32 use_srbm, use_grbm, pg_lock; + struct { + __u32 se, sh, instance; + } grbm; + struct { + __u32 me, pipe, queue, vmid; + } srbm; + u32 xcc_id; +}; + +struct amdgpu_debugfs_gprwave_iocdata { + u32 gpr_or_wave, se, sh, cu, wave, simd, xcc_id; + struct { + u32 thread, vpgr_or_sgpr; + } gpr; +}; + /* * MMIO debugfs state data (per file* handle) */ struct amdgpu_debugfs_regs2_data { struct amdgpu_device *adev; struct mutex lock; - struct amdgpu_debugfs_regs2_iocdata id; + struct amdgpu_debugfs_regs2_iocdata_v2 id; +}; + +struct amdgpu_debugfs_gprwave_data { + struct amdgpu_device *adev; + struct mutex lock; + struct amdgpu_debugfs_gprwave_iocdata id; }; enum AMDGPU_DEBUGFS_REGS2_CMDS { AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, +}; + +enum AMDGPU_DEBUGFS_GPRWAVE_CMDS { + AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0, }; +//reg2 interface #define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE, struct amdgpu_debugfs_regs2_iocdata) +#define AMDGPU_DEBUGFS_REGS2_IOC_SET_STATE_V2 _IOWR(0x20, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, struct amdgpu_debugfs_regs2_iocdata_v2) + +//gprwave interface +#define AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE _IOWR(0x20, AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE, struct amdgpu_debugfs_gprwave_iocdata) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 6887109abb13..b7441654e6fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -96,16 +96,16 @@ */ struct amdgpu_uvd_cs_ctx { struct amdgpu_cs_parser *parser; - unsigned reg, count; - unsigned data0, data1; - unsigned idx; + unsigned int reg, count; + unsigned int data0, data1; + unsigned int idx; struct amdgpu_ib *ib; /* does the IB has a msg command */ bool has_msg_cmd; /* minimum buffer sizes */ - unsigned *buf_sizes; + unsigned int *buf_sizes; }; #ifdef CONFIG_DRM_AMDGPU_SI @@ -186,7 +186,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; - unsigned family_id; + unsigned int family_id; int i, j, r; INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); @@ -275,7 +275,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) family_id = le32_to_cpu(hdr->ucode_version) & 0xff; if (adev->asic_type < CHIP_VEGA20) { - unsigned version_major, version_minor; + unsigned int version_major, version_minor; version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; @@ -420,7 +420,7 @@ int amdgpu_uvd_entity_init(struct amdgpu_device *adev) int amdgpu_uvd_suspend(struct amdgpu_device *adev) { - unsigned size; + unsigned int size; void *ptr; int i, j, idx; bool in_ras_intr = amdgpu_ras_intr_triggered(); @@ -469,7 +469,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) int amdgpu_uvd_resume(struct amdgpu_device *adev) { - unsigned size; + unsigned int size; void *ptr; int i, idx; @@ -491,7 +491,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) adev->uvd.inst[i].saved_bo = NULL; } else { const struct common_firmware_header *hdr; - unsigned offset; + unsigned int offset; hdr = (const struct common_firmware_header *)adev->uvd.fw->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { @@ -542,6 +542,7 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo) { int i; + for (i = 0; i < abo->placement.num_placement; ++i) { abo->placements[i].fpfn = 0 >> PAGE_SHIFT; abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; @@ -579,7 +580,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping); if (r) { - DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); + DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); return r; } @@ -589,6 +590,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) if (cmd == 0x0 || cmd == 0x3) { /* yes, force it into VRAM */ uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; + amdgpu_bo_placement_from_domain(bo, domain); } amdgpu_uvd_force_into_uvd_segment(bo); @@ -609,21 +611,21 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) * Peek into the decode message and calculate the necessary buffer sizes. */ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, - unsigned buf_sizes[]) + unsigned int buf_sizes[]) { - unsigned stream_type = msg[4]; - unsigned width = msg[6]; - unsigned height = msg[7]; - unsigned dpb_size = msg[9]; - unsigned pitch = msg[28]; - unsigned level = msg[57]; + unsigned int stream_type = msg[4]; + unsigned int width = msg[6]; + unsigned int height = msg[7]; + unsigned int dpb_size = msg[9]; + unsigned int pitch = msg[28]; + unsigned int level = msg[57]; - unsigned width_in_mb = width / 16; - unsigned height_in_mb = ALIGN(height / 16, 2); - unsigned fs_in_mb = width_in_mb * height_in_mb; + unsigned int width_in_mb = width / 16; + unsigned int height_in_mb = ALIGN(height / 16, 2); + unsigned int fs_in_mb = width_in_mb * height_in_mb; - unsigned image_size, tmp, min_dpb_size, num_dpb_buffer; - unsigned min_ctx_size = ~0; + unsigned int image_size, tmp, min_dpb_size, num_dpb_buffer; + unsigned int min_ctx_size = ~0; image_size = width * height; image_size += image_size / 2; @@ -631,7 +633,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, switch (stream_type) { case 0: /* H264 */ - switch(level) { + switch (level) { case 30: num_dpb_buffer = 8100 / fs_in_mb; break; @@ -709,7 +711,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, break; case 7: /* H264 Perf */ - switch(level) { + switch (level) { case 30: num_dpb_buffer = 8100 / fs_in_mb; break; @@ -742,7 +744,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, /* reference picture buffer */ min_dpb_size = image_size * num_dpb_buffer; - if (!adev->uvd.use_ctx_buf){ + if (!adev->uvd.use_ctx_buf) { /* macroblock context buffer */ min_dpb_size += width_in_mb * height_in_mb * num_dpb_buffer * 192; @@ -805,7 +807,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, * Make sure that we don't open up to many sessions. */ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, - struct amdgpu_bo *bo, unsigned offset) + struct amdgpu_bo *bo, unsigned int offset) { struct amdgpu_device *adev = ctx->parser->adev; int32_t *msg, msg_type, handle; @@ -911,7 +913,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) r = amdgpu_cs_find_mapping(ctx->parser, addr, &bo, &mapping); if (r) { - DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr); + DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); return r; } @@ -930,7 +932,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) if (cmd < 0x4) { if ((end - start) < ctx->buf_sizes[cmd]) { DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, - (unsigned)(end - start), + (unsigned int)(end - start), ctx->buf_sizes[cmd]); return -EINVAL; } @@ -938,7 +940,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) } else if (cmd == 0x206) { if ((end - start) < ctx->buf_sizes[4]) { DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, - (unsigned)(end - start), + (unsigned int)(end - start), ctx->buf_sizes[4]); return -EINVAL; } @@ -949,14 +951,14 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) if (!ctx->parser->adev->uvd.address_64_bit) { if ((start >> 28) != ((end - 1) >> 28)) { - DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n", + DRM_ERROR("reloc %llx-%llx crossing 256MB boundary!\n", start, end); return -EINVAL; } if ((cmd == 0 || cmd == 0x3) && (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) { - DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", + DRM_ERROR("msg/fb buffer %llx-%llx out of 256MB segment!\n", start, end); return -EINVAL; } @@ -990,7 +992,7 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, ctx->idx++; for (i = 0; i <= ctx->count; ++i) { - unsigned reg = ctx->reg + i; + unsigned int reg = ctx->reg + i; if (ctx->idx >= ctx->ib->length_dw) { DRM_ERROR("Register command after end of CS!\n"); @@ -1036,7 +1038,8 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) { uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx); - unsigned type = CP_PACKET_GET_TYPE(cmd); + unsigned int type = CP_PACKET_GET_TYPE(cmd); + switch (type) { case PACKET_TYPE0: ctx->reg = CP_PACKET0_GET_REG(cmd); @@ -1070,7 +1073,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, struct amdgpu_ib *ib) { struct amdgpu_uvd_cs_ctx ctx = {}; - unsigned buf_sizes[] = { + unsigned int buf_sizes[] = { [0x00000000] = 2048, [0x00000001] = 0xFFFFFFFF, [0x00000002] = 0xFFFFFFFF, @@ -1185,8 +1188,9 @@ err_free: } /* multiple fence commands without any stream commands in between can - crash the vcpu so just try to emmit a dummy create/destroy msg to - avoid this */ + * crash the vcpu so just try to emmit a dummy create/destroy msg to + * avoid this + */ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, struct dma_fence **fence) { @@ -1252,15 +1256,14 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, uvd.idle_work.work); - unsigned fences = 0, i, j; + unsigned int fences = 0, i, j; for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { if (adev->uvd.harvest_config & (1 << i)) continue; fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring); - for (j = 0; j < adev->uvd.num_enc_rings; ++j) { + for (j = 0; j < adev->uvd.num_enc_rings; ++j) fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]); - } } if (fences == 0) { @@ -1356,7 +1359,7 @@ error: */ uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev) { - unsigned i; + unsigned int i; uint32_t used_handles = 0; for (i = 0; i < adev->uvd.max_handles; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index e2b7324a70cb..1904edf68407 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -99,7 +99,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) { const char *fw_name; const struct common_firmware_header *hdr; - unsigned ucode_version, version_major, version_minor, binary_id; + unsigned int ucode_version, version_major, version_minor, binary_id; int i, r; switch (adev->asic_type) { @@ -207,7 +207,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) */ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) { - unsigned i; + unsigned int i; if (adev->vce.vcpu_bo == NULL) return 0; @@ -286,7 +286,7 @@ int amdgpu_vce_resume(struct amdgpu_device *adev) { void *cpu_addr; const struct common_firmware_header *hdr; - unsigned offset; + unsigned int offset; int r, idx; if (adev->vce.vcpu_bo == NULL) @@ -332,7 +332,7 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, vce.idle_work.work); - unsigned i, count = 0; + unsigned int i, count = 0; for (i = 0; i < adev->vce.num_rings; i++) count += amdgpu_fence_count_emitted(&adev->vce.ring[i]); @@ -409,6 +409,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) { struct amdgpu_ring *ring = &adev->vce.ring[0]; int i, r; + for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { uint32_t handle = atomic_read(&adev->vce.handles[i]); @@ -436,7 +437,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, struct dma_fence **fence) { - const unsigned ib_size_dw = 1024; + const unsigned int ib_size_dw = 1024; struct amdgpu_job *job; struct amdgpu_ib *ib; struct amdgpu_ib ib_msg; @@ -528,7 +529,7 @@ err: static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, bool direct, struct dma_fence **fence) { - const unsigned ib_size_dw = 1024; + const unsigned int ib_size_dw = 1024; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; @@ -596,12 +597,12 @@ err: */ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib, int lo, int hi, - unsigned size, int32_t index) + unsigned int size, int32_t index) { int64_t offset = ((uint64_t)size) * ((int64_t)index); struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *mapping; - unsigned i, fpfn, lpfn; + unsigned int i, fpfn, lpfn; struct amdgpu_bo *bo; uint64_t addr; int r; @@ -619,7 +620,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); if (r) { - DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", + DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n", addr, lo, hi, size, index); return r; } @@ -646,7 +647,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, * Patch relocation inside command stream with real buffer address */ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib, - int lo, int hi, unsigned size, uint32_t index) + int lo, int hi, unsigned int size, uint32_t index) { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo; @@ -662,14 +663,14 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib, r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); if (r) { - DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", + DRM_ERROR("Can't find BO for addr 0x%010llx %d %d %d %d\n", addr, lo, hi, size, index); return r; } if ((addr + (uint64_t)size) > (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n", + DRM_ERROR("BO too small for addr 0x%010llx %d %d\n", addr, lo, hi); return -EINVAL; } @@ -692,12 +693,12 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib, * @allocated: allocated a new handle? * * Validates the handle and return the found session index or -EINVAL - * we we don't have another free session index. + * we don't have another free session index. */ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, uint32_t handle, uint32_t *allocated) { - unsigned i; + unsigned int i; /* validate the handle */ for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { @@ -735,14 +736,14 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib) { - unsigned fb_idx = 0, bs_idx = 0; + unsigned int fb_idx = 0, bs_idx = 0; int session_idx = -1; uint32_t destroyed = 0; uint32_t created = 0; uint32_t allocated = 0; uint32_t tmp, handle = 0; uint32_t *size = &tmp; - unsigned idx; + unsigned int idx; int i, r = 0; job->vm = NULL; @@ -1084,7 +1085,7 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, * */ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags) + unsigned int flags) { WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -1106,7 +1107,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; uint32_t rptr; - unsigned i; + unsigned int i; int r, timeout = adev->usec_timeout; /* skip ring test for sriov*/ @@ -1171,7 +1172,7 @@ error: enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring) { - switch(ring) { + switch (ring) { case 0: return AMDGPU_RING_PRIO_0; case 1: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 2d94f1b63bd6..acbef1a24b9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -56,6 +56,7 @@ #define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin" #define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin" #define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin" +#define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin" #define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); @@ -77,6 +78,7 @@ MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP); MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2); MODULE_FIRMWARE(FIRMWARE_VCN4_0_0); MODULE_FIRMWARE(FIRMWARE_VCN4_0_2); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_3); MODULE_FIRMWARE(FIRMWARE_VCN4_0_4); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -167,7 +169,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)){ + if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) { fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)); log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log); } else { @@ -233,11 +235,11 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << j)) continue; - if (adev->vcn.indirect_sram) { - amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo, - &adev->vcn.inst[j].dpg_sram_gpu_addr, - (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); - } + amdgpu_bo_free_kernel( + &adev->vcn.inst[j].dpg_sram_bo, + &adev->vcn.inst[j].dpg_sram_gpu_addr, + (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); + kvfree(adev->vcn.inst[j].saved_bo); amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, @@ -274,20 +276,19 @@ bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type t bool ret = false; int vcn_config = adev->vcn.vcn_config[vcn_instance]; - if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) { + if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) ret = true; - } else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) { + else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) ret = true; - } else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) { + else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) ret = true; - } return ret; } int amdgpu_vcn_suspend(struct amdgpu_device *adev) { - unsigned size; + unsigned int size; void *ptr; int i, idx; @@ -316,7 +317,7 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) int amdgpu_vcn_resume(struct amdgpu_device *adev) { - unsigned size; + unsigned int size; void *ptr; int i, idx; @@ -338,7 +339,7 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) adev->vcn.inst[i].saved_bo = NULL; } else { const struct common_firmware_header *hdr; - unsigned offset; + unsigned int offset; hdr = (const struct common_firmware_header *)adev->vcn.fw->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { @@ -369,9 +370,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) if (adev->vcn.harvest_config & (1 << j)) continue; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.num_enc_rings; ++i) fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); - } if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { struct dpg_pause_state new_state; @@ -458,7 +458,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; uint32_t tmp = 0; - unsigned i; + unsigned int i; int r; /* VCN in SRIOV does not support direct register read/write */ @@ -795,7 +795,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; uint32_t rptr; - unsigned i; + unsigned int i; int r; if (amdgpu_sriov_vf(adev)) @@ -993,11 +993,14 @@ error: int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) { + struct amdgpu_device *adev = ring->adev; long r; - r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); - if (r) - goto error; + if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(4, 0, 3)) { + r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); + if (r) + goto error; + } r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout); @@ -1007,7 +1010,7 @@ error: enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) { - switch(ring) { + switch (ring) { case 0: return AMDGPU_RING_PRIO_0; case 1: @@ -1026,6 +1029,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { @@ -1041,6 +1045,9 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) adev->firmware.ucode[idx].fw = adev->vcn.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(4, 0, 3)) + break; } dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } @@ -1051,7 +1058,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) */ #if defined(CONFIG_DEBUG_FS) static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) + size_t size, loff_t *pos) { struct amdgpu_vcn_inst *vcn; void *log_buf; @@ -1097,7 +1104,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, if (read_pos == AMDGPU_VCNFW_LOG_SIZE) read_pos = plog->header_size; if (read_num[i] == copy_to_user((buf + read_bytes), - (log_buf + read_pos), read_num[i])) + (log_buf + read_pos), read_num[i])) return -EFAULT; read_bytes += read_num[i]; @@ -1118,7 +1125,7 @@ static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = { #endif void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i, - struct amdgpu_vcn_inst *vcn) + struct amdgpu_vcn_inst *vcn) { #if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev_to_drm(adev)->primary; @@ -1126,7 +1133,7 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i, char name[32]; sprintf(name, "amdgpu_vcn_%d_fwlog", i); - debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, vcn, + debugfs_create_file_size(name, S_IFREG | 0444, root, vcn, &amdgpu_debugfs_vcnfwlog_fops, AMDGPU_VCNFW_LOG_SIZE); #endif @@ -1140,7 +1147,7 @@ void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn) uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size; volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr; volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr - + vcn->fw_shared.log_offset; + + vcn->fw_shared.log_offset; *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG); fw_log->is_enabled = 1; fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index f1397ef66fd7..92d5534df5f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -32,7 +32,7 @@ #define AMDGPU_VCN_FIRMWARE_OFFSET 256 #define AMDGPU_VCN_MAX_ENC_RINGS 3 -#define AMDGPU_MAX_VCN_INSTANCES 2 +#define AMDGPU_MAX_VCN_INSTANCES 4 #define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES #define AMDGPU_VCN_HARVEST_VCN0 (1 << 0) @@ -141,18 +141,23 @@ RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \ - do { \ - if (!indirect) { \ - WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ - (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - } else { \ - *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \ - *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \ - } \ +#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \ + mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15( \ + VCN, GET_INST(VCN, inst_idx), \ + mmUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \ + offset; \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \ + value; \ + } \ } while (0) #define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2) @@ -243,6 +248,7 @@ struct amdgpu_vcn_inst { uint32_t *dpg_sram_curr_addr; atomic_t dpg_enc_submission_cnt; struct amdgpu_vcn_fw_shared fw_shared; + uint8_t aid_id; }; struct amdgpu_vcn_ras { @@ -272,6 +278,9 @@ struct amdgpu_vcn { struct ras_common_if *ras_if; struct amdgpu_vcn_ras *ras; + + uint16_t inst_mask; + uint8_t num_inst_per_aid; }; struct amdgpu_fw_shared_rb_ptrs_struct { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f2e2cbaa7fde..25b4d7f0bd35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -56,7 +56,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) /* enable virtual display */ if (adev->asic_type != CHIP_ALDEBARAN && - adev->asic_type != CHIP_ARCTURUS) { + adev->asic_type != CHIP_ARCTURUS && + ((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) { if (adev->mode_info.num_crtc == 0) adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; @@ -65,16 +66,19 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) adev->cg_flags = 0; adev->pg_flags = 0; - /* enable mcbp for sriov asic_type before soc21 */ - amdgpu_mcbp = (adev->asic_type < CHIP_IP_DISCOVERY) ? 1 : 0; + /* enable mcbp for sriov */ + amdgpu_mcbp = 1; + /* Reduce kcq number to 2 to reduce latency */ + if (amdgpu_num_kcq == -1) + amdgpu_num_kcq = 2; } void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *ring = &kiq->ring; signed long r, cnt = 0; unsigned long flags; @@ -557,7 +561,6 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev) POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version); - POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_IMU, adev->gfx.imu_fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version); POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD, adev->psp.asd_context.bin_desc.fw_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3c0310576b3b..143d11afe0e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -267,6 +267,32 @@ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) } /** + * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine + * @vm: the VM which state machine to reset + * + * Move all vm_bo object in the VM into a state where they will be updated + * again during validation. + */ +static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) +{ + struct amdgpu_vm_bo_base *vm_bo, *tmp; + + spin_lock(&vm->status_lock); + list_splice_init(&vm->done, &vm->invalidated); + list_for_each_entry(vm_bo, &vm->invalidated, vm_status) + vm_bo->moved = true; + list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { + struct amdgpu_bo *bo = vm_bo->bo; + + if (!bo || bo->tbo.type != ttm_bo_type_kernel) + list_move(&vm_bo->vm_status, &vm_bo->vm->moved); + else if (bo->parent) + list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); + } + spin_unlock(&vm->status_lock); +} + +/** * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm * * @base: base structure for tracking BO usage in a VM @@ -351,6 +377,58 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, spin_unlock(&adev->mman.bdev.lru_lock); } +/* Create scheduler entities for page table updates */ +static int amdgpu_vm_init_entities(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + int r; + + r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, + adev->vm_manager.vm_pte_scheds, + adev->vm_manager.vm_pte_num_scheds, NULL); + if (r) + goto error; + + return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL, + adev->vm_manager.vm_pte_scheds, + adev->vm_manager.vm_pte_num_scheds, NULL); + +error: + drm_sched_entity_destroy(&vm->immediate); + return r; +} + +/* Destroy the entities for page table updates again */ +static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm) +{ + drm_sched_entity_destroy(&vm->immediate); + drm_sched_entity_destroy(&vm->delayed); +} + +/** + * amdgpu_vm_generation - return the page table re-generation counter + * @adev: the amdgpu_device + * @vm: optional VM to check, might be NULL + * + * Returns a page table re-generation token to allow checking if submissions + * are still valid to use this VM. The VM parameter might be NULL in which case + * just the VRAM lost counter will be used. + */ +uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32; + + if (!vm) + return result; + + result += vm->generation; + /* Add one if the page tables will be re-generated on next CS */ + if (drm_sched_entity_error(&vm->delayed)) + ++result; + + return result; +} + /** * amdgpu_vm_validate_pt_bos - validate the page table BOs * @@ -373,6 +451,15 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *bo; int r; + if (drm_sched_entity_error(&vm->delayed)) { + ++vm->generation; + amdgpu_vm_bo_reset_state_machine(vm); + amdgpu_vm_fini_entities(vm); + r = amdgpu_vm_init_entities(adev, vm); + if (r) + return r; + } + spin_lock(&vm->status_lock); while (!list_empty(&vm->evicted)) { bo_base = list_first_entry(&vm->evicted, @@ -920,42 +1007,51 @@ error_unlock: return r; } +static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va, + struct amdgpu_mem_stats *stats) +{ + struct amdgpu_vm *vm = bo_va->base.vm; + struct amdgpu_bo *bo = bo_va->base.bo; + + if (!bo) + return; + + /* + * For now ignore BOs which are currently locked and potentially + * changing their location. + */ + if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv && + !dma_resv_trylock(bo->tbo.base.resv)) + return; + + amdgpu_bo_get_memory(bo, stats); + if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv) + dma_resv_unlock(bo->tbo.base.resv); +} + void amdgpu_vm_get_memory(struct amdgpu_vm *vm, struct amdgpu_mem_stats *stats) { struct amdgpu_bo_va *bo_va, *tmp; spin_lock(&vm->status_lock); - list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, stats); - } - list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, stats); - } - list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, stats); - } - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, stats); - } - list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, stats); - } - list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) { - if (!bo_va->base.bo) - continue; - amdgpu_bo_get_memory(bo_va->base.bo, stats); - } + list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) + amdgpu_vm_bo_get_memory(bo_va, stats); + + list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) + amdgpu_vm_bo_get_memory(bo_va, stats); + + list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) + amdgpu_vm_bo_get_memory(bo_va, stats); + + list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) + amdgpu_vm_bo_get_memory(bo_va, stats); + + list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) + amdgpu_vm_bo_get_memory(bo_va, stats); + + list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) + amdgpu_vm_bo_get_memory(bo_va, stats); spin_unlock(&vm->status_lock); } @@ -1358,6 +1454,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); bo_va->ref_count = 1; + bo_va->last_pt_update = dma_fence_get_stub(); INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); @@ -1433,14 +1530,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, uint64_t eaddr; /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || - size == 0 || size & ~PAGE_MASK) + if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) + return -EINVAL; + if (saddr + size <= saddr || offset + size <= offset) return -EINVAL; /* make sure object fit at this offset */ eaddr = saddr + size - 1; - if (saddr >= eaddr || - (bo && offset + size > amdgpu_bo_size(bo)) || + if ((bo && offset + size > amdgpu_bo_size(bo)) || (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; @@ -1499,14 +1596,14 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, int r; /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || - size == 0 || size & ~PAGE_MASK) + if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) + return -EINVAL; + if (saddr + size <= saddr || offset + size <= offset) return -EINVAL; /* make sure object fit at this offset */ eaddr = saddr + size - 1; - if (saddr >= eaddr || - (bo && offset + size > amdgpu_bo_size(bo)) || + if ((bo && offset + size > amdgpu_bo_size(bo)) || (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; @@ -2038,19 +2135,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) INIT_LIST_HEAD(&vm->pt_freed); INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); - /* create scheduler entities for page table updates */ - r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, - adev->vm_manager.vm_pte_scheds, - adev->vm_manager.vm_pte_num_scheds, NULL); + r = amdgpu_vm_init_entities(adev, vm); if (r) return r; - r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL, - adev->vm_manager.vm_pte_scheds, - adev->vm_manager.vm_pte_num_scheds, NULL); - if (r) - goto error_free_immediate; - vm->pte_support_ats = false; vm->is_compute_context = false; @@ -2067,9 +2155,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->update_funcs = &amdgpu_vm_cpu_funcs; else vm->update_funcs = &amdgpu_vm_sdma_funcs; - vm->last_update = NULL; + + vm->last_update = dma_fence_get_stub(); vm->last_unlocked = dma_fence_get_stub(); vm->last_tlb_flush = dma_fence_get_stub(); + vm->generation = 0; mutex_init(&vm->eviction_lock); vm->evicting = false; @@ -2110,10 +2200,7 @@ error_free_root: error_free_delayed: dma_fence_put(vm->last_tlb_flush); dma_fence_put(vm->last_unlocked); - drm_sched_entity_destroy(&vm->delayed); - -error_free_immediate: - drm_sched_entity_destroy(&vm->immediate); + amdgpu_vm_fini_entities(vm); return r; } @@ -2192,7 +2279,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) goto unreserve_bo; dma_fence_put(vm->last_update); - vm->last_update = NULL; + vm->last_update = dma_fence_get_stub(); vm->is_compute_context = true; /* Free the shadow bo for compute VM */ @@ -2266,8 +2353,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_bo_unref(&root); WARN_ON(vm->root.bo); - drm_sched_entity_destroy(&vm->immediate); - drm_sched_entity_destroy(&vm->delayed); + amdgpu_vm_fini_entities(vm); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); @@ -2282,8 +2368,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } dma_fence_put(vm->last_update); - for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) - amdgpu_vmid_free_reserved(adev, vm, i); + + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { + if (vm->reserved_vmid[i]) { + amdgpu_vmid_free_reserved(adev, i); + vm->reserved_vmid[i] = false; + } + } + } /** @@ -2366,18 +2458,25 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_vm *args = data; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = filp->driver_priv; - int r; + + /* No valid flags defined yet */ + if (args->in.flags) + return -EINVAL; switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: /* We only have requirement to reserve vmid from gfxhub */ - r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, - AMDGPU_GFXHUB_0); - if (r) - return r; + if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { + amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0)); + fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true; + } + break; case AMDGPU_VM_OP_UNRESERVE_VMID: - amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); + if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) { + amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0)); + fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false; + } break; default: return -EINVAL; @@ -2432,6 +2531,9 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * amdgpu_vm_handle_fault - graceful handling of VM faults. * @adev: amdgpu device pointer * @pasid: PASID of the VM + * @vmid: VMID, only used for GFX 9.4.3. + * @node_id: Node_id received in IH cookie. Only applicable for + * GFX 9.4.3. * @addr: Address of the fault * @write_fault: true is write fault, false is read fault * @@ -2439,7 +2541,8 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - uint64_t addr, bool write_fault) + u32 vmid, u32 node_id, uint64_t addr, + bool write_fault) { bool is_compute_context = false; struct amdgpu_bo *root; @@ -2463,8 +2566,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; - if (is_compute_context && - !svm_range_restore_pages(adev, pasid, addr, write_fault)) { + if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, + node_id, addr, write_fault)) { amdgpu_bo_unref(&root); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6f085f0b4ef3..9c85d494f2a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -111,11 +111,14 @@ struct amdgpu_mem_stats; /* Reserve 4MB VRAM for page tables */ #define AMDGPU_VM_RESERVED_VRAM (8ULL << 20) -/* max number of VMHUB */ -#define AMDGPU_MAX_VMHUBS 3 -#define AMDGPU_GFXHUB_0 0 -#define AMDGPU_MMHUB_0 1 -#define AMDGPU_MMHUB_1 2 +/* + * max number of VMHUB + * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1 + */ +#define AMDGPU_MAX_VMHUBS 13 +#define AMDGPU_GFXHUB(x) (x) +#define AMDGPU_MMHUB0(x) (8 + x) +#define AMDGPU_MMHUB1(x) (8 + 4 + x) /* Reserve 2MB at top/bottom of address space for kernel use */ #define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) @@ -292,6 +295,9 @@ struct amdgpu_vm { atomic64_t tlb_seq; struct dma_fence *last_tlb_flush; + /* How many times we had to re-generate the page tables */ + uint64_t generation; + /* Last unlocked submission to the scheduler entities */ struct dma_fence *last_unlocked; @@ -326,6 +332,9 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* Flag to indicate if VM is used for compute */ bool is_compute_context; + + /* Memory partition number, -1 means any partition */ + int8_t mem_id; }; struct amdgpu_vm_manager { @@ -391,6 +400,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry); bool amdgpu_vm_ready(struct amdgpu_vm *vm); +uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); @@ -452,7 +462,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - uint64_t addr, bool write_fault); + u32 vmid, u32 node_id, uint64_t addr, + bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 051c7194ab4f..dea1a64be44d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -502,6 +502,7 @@ exit: int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int level, bool immediate, struct amdgpu_bo_vm **vmbo) { + struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm); struct amdgpu_bo_param bp; struct amdgpu_bo *bo; struct dma_resv *resv; @@ -512,7 +513,12 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp.size = amdgpu_vm_pt_size(adev, level); bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + + if (!adev->gmc.is_app_apu) + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + else + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_CPU_GTT_USWC; @@ -529,6 +535,8 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp.type = ttm_bo_type_kernel; bp.no_wait_gpu = immediate; + bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1; + if (vm->root.bo) bp.resv = vm->root.bo->tbo.base.resv; @@ -553,6 +561,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp.type = ttm_bo_type_kernel; bp.resv = bo->tbo.base.resv; bp.bo_ptr_size = sizeof(struct amdgpu_bo); + bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1; r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); @@ -780,13 +789,14 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, uint64_t pe, uint64_t addr, unsigned int count, uint32_t incr, uint64_t flags) - { + struct amdgpu_device *adev = params->adev; + if (level != AMDGPU_VM_PTB) { flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); + amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags); - } else if (params->adev->asic_type >= CHIP_VEGA10 && + } else if (adev->asic_type >= CHIP_VEGA10 && !(flags & AMDGPU_PTE_VALID) && !(flags & AMDGPU_PTE_PRT)) { @@ -794,6 +804,21 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, flags |= AMDGPU_PTE_EXECUTABLE; } + /* APUs mapping system memory may need different MTYPEs on different + * NUMA nodes. Only do this for contiguous ranges that can be assumed + * to be on the same NUMA node. + */ + if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) && + adev->gmc.gmc_funcs->override_vm_pte_flags && + num_possible_nodes() > 1) { + if (!params->pages_addr) + amdgpu_gmc_override_vm_pte_flags(adev, params->vm, + addr, &flags); + else + dev_dbg(adev->dev, + "override_vm_pte_flags skipped: non-contiguous\n"); + } + params->vm->update_funcs->update(params, pt, pe, addr, count, incr, flags); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index afacfb9b5bf6..c7085a747b03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -370,6 +370,45 @@ out: return ret; } +static void amdgpu_dummy_vram_mgr_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + DRM_DEBUG_DRIVER("Dummy vram mgr debug\n"); +} + +static bool amdgpu_dummy_vram_mgr_compatible(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + DRM_DEBUG_DRIVER("Dummy vram mgr compatible\n"); + return false; +} + +static bool amdgpu_dummy_vram_mgr_intersects(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + DRM_DEBUG_DRIVER("Dummy vram mgr intersects\n"); + return true; +} + +static void amdgpu_dummy_vram_mgr_del(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + DRM_DEBUG_DRIVER("Dummy vram mgr deleted\n"); +} + +static int amdgpu_dummy_vram_mgr_new(struct ttm_resource_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + DRM_DEBUG_DRIVER("Dummy vram mgr new\n"); + return -ENOSPC; +} + /** * amdgpu_vram_mgr_new - allocate new ranges * @@ -818,6 +857,14 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, mutex_unlock(&mgr->lock); } +static const struct ttm_resource_manager_func amdgpu_dummy_vram_mgr_func = { + .alloc = amdgpu_dummy_vram_mgr_new, + .free = amdgpu_dummy_vram_mgr_del, + .intersects = amdgpu_dummy_vram_mgr_intersects, + .compatible = amdgpu_dummy_vram_mgr_compatible, + .debug = amdgpu_dummy_vram_mgr_debug +}; + static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { .alloc = amdgpu_vram_mgr_new, .free = amdgpu_vram_mgr_del, @@ -842,17 +889,22 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) ttm_resource_manager_init(man, &adev->mman.bdev, adev->gmc.real_vram_size); - man->func = &amdgpu_vram_mgr_func; - - err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); - if (err) - return err; - mutex_init(&mgr->lock); INIT_LIST_HEAD(&mgr->reservations_pending); INIT_LIST_HEAD(&mgr->reserved_pages); mgr->default_page_size = PAGE_SIZE; + if (!adev->gmc.is_app_apu) { + man->func = &amdgpu_vram_mgr_func; + + err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); + if (err) + return err; + } else { + man->func = &amdgpu_dummy_vram_mgr_func; + DRM_INFO("Setup dummy vram mgr\n"); + } + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); ttm_resource_manager_set_used(man, true); return 0; @@ -887,7 +939,8 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) drm_buddy_free_list(&mgr->mm, &rsv->allocated); kfree(rsv); } - drm_buddy_fini(&mgr->mm); + if (!adev->gmc.is_app_apu) + drm_buddy_fini(&mgr->mm); mutex_unlock(&mgr->lock); ttm_resource_manager_cleanup(man); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c new file mode 100644 index 000000000000..d733fa6e7477 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -0,0 +1,399 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_xcp.h" +#include "amdgpu_drv.h" + +#include <drm/drm_drv.h> +#include "../amdxcp/amdgpu_xcp_drv.h" + +static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr, + struct amdgpu_xcp_ip *xcp_ip, int xcp_state) +{ + int (*run_func)(void *handle, uint32_t inst_mask); + int ret = 0; + + if (!xcp_ip || !xcp_ip->valid || !xcp_ip->ip_funcs) + return 0; + + run_func = NULL; + + switch (xcp_state) { + case AMDGPU_XCP_PREPARE_SUSPEND: + run_func = xcp_ip->ip_funcs->prepare_suspend; + break; + case AMDGPU_XCP_SUSPEND: + run_func = xcp_ip->ip_funcs->suspend; + break; + case AMDGPU_XCP_PREPARE_RESUME: + run_func = xcp_ip->ip_funcs->prepare_resume; + break; + case AMDGPU_XCP_RESUME: + run_func = xcp_ip->ip_funcs->resume; + break; + } + + if (run_func) + ret = run_func(xcp_mgr->adev, xcp_ip->inst_mask); + + return ret; +} + +static int amdgpu_xcp_run_transition(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id, + int state) +{ + struct amdgpu_xcp_ip *xcp_ip; + struct amdgpu_xcp *xcp; + int i, ret; + + if (xcp_id >= MAX_XCP || !xcp_mgr->xcp[xcp_id].valid) + return -EINVAL; + + xcp = &xcp_mgr->xcp[xcp_id]; + for (i = 0; i < AMDGPU_XCP_MAX_BLOCKS; ++i) { + xcp_ip = &xcp->ip[i]; + ret = __amdgpu_xcp_run(xcp_mgr, xcp_ip, state); + if (ret) + break; + } + + return ret; +} + +int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id) +{ + return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, + AMDGPU_XCP_PREPARE_SUSPEND); +} + +int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id) +{ + return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_SUSPEND); +} + +int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id) +{ + return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, + AMDGPU_XCP_PREPARE_RESUME); +} + +int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id) +{ + return amdgpu_xcp_run_transition(xcp_mgr, xcp_id, AMDGPU_XCP_RESUME); +} + +static void __amdgpu_xcp_add_block(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id, + struct amdgpu_xcp_ip *ip) +{ + struct amdgpu_xcp *xcp; + + if (!ip) + return; + + xcp = &xcp_mgr->xcp[xcp_id]; + xcp->ip[ip->ip_id] = *ip; + xcp->ip[ip->ip_id].valid = true; + + xcp->valid = true; +} + +int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + struct amdgpu_xcp_ip ip; + uint8_t mem_id; + int i, j, ret; + + if (!num_xcps || num_xcps > MAX_XCP) + return -EINVAL; + + xcp_mgr->mode = mode; + + for (i = 0; i < MAX_XCP; ++i) + xcp_mgr->xcp[i].valid = false; + + for (i = 0; i < num_xcps; ++i) { + for (j = AMDGPU_XCP_GFXHUB; j < AMDGPU_XCP_MAX_BLOCKS; ++j) { + ret = xcp_mgr->funcs->get_ip_details(xcp_mgr, i, j, + &ip); + if (ret) + continue; + + __amdgpu_xcp_add_block(xcp_mgr, i, &ip); + } + + xcp_mgr->xcp[i].id = i; + + if (xcp_mgr->funcs->get_xcp_mem_id) { + ret = xcp_mgr->funcs->get_xcp_mem_id( + xcp_mgr, &xcp_mgr->xcp[i], &mem_id); + if (ret) + continue; + else + xcp_mgr->xcp[i].mem_id = mem_id; + } + } + + xcp_mgr->num_xcps = num_xcps; + amdgpu_xcp_update_partition_sched_list(adev); + + xcp_mgr->num_xcp_per_mem_partition = num_xcps / xcp_mgr->adev->gmc.num_mem_partitions; + return 0; +} + +int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode) +{ + int ret, curr_mode, num_xcps = 0; + + if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE) + return -EINVAL; + + if (xcp_mgr->mode == mode) + return 0; + + if (!xcp_mgr->funcs || !xcp_mgr->funcs->switch_partition_mode) + return 0; + + mutex_lock(&xcp_mgr->xcp_lock); + + curr_mode = xcp_mgr->mode; + /* State set to transient mode */ + xcp_mgr->mode = AMDGPU_XCP_MODE_TRANS; + + ret = xcp_mgr->funcs->switch_partition_mode(xcp_mgr, mode, &num_xcps); + + if (ret) { + /* Failed, get whatever mode it's at now */ + if (xcp_mgr->funcs->query_partition_mode) + xcp_mgr->mode = amdgpu_xcp_query_partition_mode( + xcp_mgr, AMDGPU_XCP_FL_LOCKED); + else + xcp_mgr->mode = curr_mode; + + goto out; + } + +out: + mutex_unlock(&xcp_mgr->xcp_lock); + + return ret; +} + +int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) +{ + int mode; + + if (xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + return xcp_mgr->mode; + + if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode) + return xcp_mgr->mode; + + if (!(flags & AMDGPU_XCP_FL_LOCKED)) + mutex_lock(&xcp_mgr->xcp_lock); + mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr); + if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode) + dev_WARN( + xcp_mgr->adev->dev, + "Cached partition mode %d not matching with device mode %d", + xcp_mgr->mode, mode); + + if (!(flags & AMDGPU_XCP_FL_LOCKED)) + mutex_unlock(&xcp_mgr->xcp_lock); + + return mode; +} + +static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev) +{ + struct drm_device *p_ddev; + struct drm_device *ddev; + int i, ret; + + ddev = adev_to_drm(adev); + + for (i = 0; i < MAX_XCP; i++) { + ret = amdgpu_xcp_drm_dev_alloc(&p_ddev); + if (ret) + return ret; + + /* Redirect all IOCTLs to the primary device */ + adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev; + adev->xcp_mgr->xcp[i].pdev = p_ddev->primary->dev; + adev->xcp_mgr->xcp[i].driver = (struct drm_driver *)p_ddev->driver; + adev->xcp_mgr->xcp[i].vma_offset_manager = p_ddev->vma_offset_manager; + p_ddev->render->dev = ddev; + p_ddev->primary->dev = ddev; + p_ddev->vma_offset_manager = ddev->vma_offset_manager; + p_ddev->driver = &amdgpu_partition_driver; + adev->xcp_mgr->xcp[i].ddev = p_ddev; + } + + return 0; +} + +int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode, + int init_num_xcps, + struct amdgpu_xcp_mgr_funcs *xcp_funcs) +{ + struct amdgpu_xcp_mgr *xcp_mgr; + + if (!xcp_funcs || !xcp_funcs->switch_partition_mode || + !xcp_funcs->get_ip_details) + return -EINVAL; + + xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL); + + if (!xcp_mgr) + return -ENOMEM; + + xcp_mgr->adev = adev; + xcp_mgr->funcs = xcp_funcs; + xcp_mgr->mode = init_mode; + mutex_init(&xcp_mgr->xcp_lock); + + if (init_mode != AMDGPU_XCP_MODE_NONE) + amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode); + + adev->xcp_mgr = xcp_mgr; + + return amdgpu_xcp_dev_alloc(adev); +} + +int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr, + enum AMDGPU_XCP_IP_BLOCK ip, int instance) +{ + struct amdgpu_xcp *xcp; + int i, id_mask = 0; + + if (ip >= AMDGPU_XCP_MAX_BLOCKS) + return -EINVAL; + + for (i = 0; i < xcp_mgr->num_xcps; ++i) { + xcp = &xcp_mgr->xcp[i]; + if ((xcp->valid) && (xcp->ip[ip].valid) && + (xcp->ip[ip].inst_mask & BIT(instance))) + id_mask |= BIT(i); + } + + if (!id_mask) + id_mask = -ENXIO; + + return id_mask; +} + +int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp, + enum AMDGPU_XCP_IP_BLOCK ip, + uint32_t *inst_mask) +{ + if (!xcp->valid || !inst_mask || !(xcp->ip[ip].valid)) + return -EINVAL; + + *inst_mask = xcp->ip[ip].inst_mask; + + return 0; +} + +int amdgpu_xcp_dev_register(struct amdgpu_device *adev, + const struct pci_device_id *ent) +{ + int i, ret; + + if (!adev->xcp_mgr) + return 0; + + for (i = 0; i < MAX_XCP; i++) { + ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data); + if (ret) + return ret; + } + + return 0; +} + +void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev) +{ + struct drm_device *p_ddev; + int i; + + if (!adev->xcp_mgr) + return; + + for (i = 0; i < MAX_XCP; i++) { + p_ddev = adev->xcp_mgr->xcp[i].ddev; + drm_dev_unplug(p_ddev); + p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev; + p_ddev->primary->dev = adev->xcp_mgr->xcp[i].pdev; + p_ddev->driver = adev->xcp_mgr->xcp[i].driver; + p_ddev->vma_offset_manager = adev->xcp_mgr->xcp[i].vma_offset_manager; + } +} + +int amdgpu_xcp_open_device(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv, + struct drm_file *file_priv) +{ + int i; + + if (!adev->xcp_mgr) + return 0; + + fpriv->xcp_id = ~0; + for (i = 0; i < MAX_XCP; ++i) { + if (!adev->xcp_mgr->xcp[i].ddev) + break; + + if (file_priv->minor == adev->xcp_mgr->xcp[i].ddev->render) { + if (adev->xcp_mgr->xcp[i].valid == FALSE) { + dev_err(adev->dev, "renderD%d partition %d not valid!", + file_priv->minor->index, i); + return -ENOENT; + } + dev_dbg(adev->dev, "renderD%d partition %d opened!", + file_priv->minor->index, i); + fpriv->xcp_id = i; + break; + } + } + + fpriv->vm.mem_id = fpriv->xcp_id == ~0 ? -1 : + adev->xcp_mgr->xcp[fpriv->xcp_id].mem_id; + return 0; +} + +void amdgpu_xcp_release_sched(struct amdgpu_device *adev, + struct amdgpu_ctx_entity *entity) +{ + struct drm_gpu_scheduler *sched; + struct amdgpu_ring *ring; + + if (!adev->xcp_mgr) + return; + + sched = entity->entity.rq->sched; + if (sched->ready) { + ring = to_amdgpu_ring(entity->entity.rq->sched); + atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt); + } +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h new file mode 100644 index 000000000000..0f8026d64ea5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -0,0 +1,182 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGPU_XCP_H +#define AMDGPU_XCP_H + +#include <linux/pci.h> +#include <linux/xarray.h> + +#include "amdgpu_ctx.h" + +#define MAX_XCP 8 + +#define AMDGPU_XCP_MODE_NONE -1 +#define AMDGPU_XCP_MODE_TRANS -2 + +#define AMDGPU_XCP_FL_NONE 0 +#define AMDGPU_XCP_FL_LOCKED (1 << 0) + +struct amdgpu_fpriv; + +enum AMDGPU_XCP_IP_BLOCK { + AMDGPU_XCP_GFXHUB, + AMDGPU_XCP_GFX, + AMDGPU_XCP_SDMA, + AMDGPU_XCP_VCN, + AMDGPU_XCP_MAX_BLOCKS +}; + +enum AMDGPU_XCP_STATE { + AMDGPU_XCP_PREPARE_SUSPEND, + AMDGPU_XCP_SUSPEND, + AMDGPU_XCP_PREPARE_RESUME, + AMDGPU_XCP_RESUME, +}; + +struct amdgpu_xcp_ip_funcs { + int (*prepare_suspend)(void *handle, uint32_t inst_mask); + int (*suspend)(void *handle, uint32_t inst_mask); + int (*prepare_resume)(void *handle, uint32_t inst_mask); + int (*resume)(void *handle, uint32_t inst_mask); +}; + +struct amdgpu_xcp_ip { + struct amdgpu_xcp_ip_funcs *ip_funcs; + uint32_t inst_mask; + + enum AMDGPU_XCP_IP_BLOCK ip_id; + bool valid; +}; + +struct amdgpu_xcp { + struct amdgpu_xcp_ip ip[AMDGPU_XCP_MAX_BLOCKS]; + + uint8_t id; + uint8_t mem_id; + bool valid; + atomic_t ref_cnt; + struct drm_device *ddev; + struct drm_device *rdev; + struct drm_device *pdev; + struct drm_driver *driver; + struct drm_vma_offset_manager *vma_offset_manager; + struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; +}; + +struct amdgpu_xcp_mgr { + struct amdgpu_device *adev; + struct mutex xcp_lock; + struct amdgpu_xcp_mgr_funcs *funcs; + + struct amdgpu_xcp xcp[MAX_XCP]; + uint8_t num_xcps; + int8_t mode; + + /* Used to determine KFD memory size limits per XCP */ + unsigned int num_xcp_per_mem_partition; +}; + +struct amdgpu_xcp_mgr_funcs { + int (*switch_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr, int mode, + int *num_xcps); + int (*query_partition_mode)(struct amdgpu_xcp_mgr *xcp_mgr); + int (*get_ip_details)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id, + enum AMDGPU_XCP_IP_BLOCK ip_id, + struct amdgpu_xcp_ip *ip); + int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr, + struct amdgpu_xcp *xcp, uint8_t *mem_id); + + int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); + int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); + int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); + int (*resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); + int (*select_scheds)(struct amdgpu_device *adev, + u32 hw_ip, u32 hw_prio, struct amdgpu_fpriv *fpriv, + unsigned int *num_scheds, struct drm_gpu_scheduler ***scheds); + int (*update_partition_sched_list)(struct amdgpu_device *adev); +}; + +int amdgpu_xcp_prepare_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); +int amdgpu_xcp_suspend(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); +int amdgpu_xcp_prepare_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); +int amdgpu_xcp_resume(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id); + +int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode, + int init_xcps, struct amdgpu_xcp_mgr_funcs *xcp_funcs); +int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode); +int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags); +int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode); +int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr, + enum AMDGPU_XCP_IP_BLOCK ip, int instance); + +int amdgpu_xcp_get_inst_details(struct amdgpu_xcp *xcp, + enum AMDGPU_XCP_IP_BLOCK ip, + uint32_t *inst_mask); + +int amdgpu_xcp_dev_register(struct amdgpu_device *adev, + const struct pci_device_id *ent); +void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev); +int amdgpu_xcp_open_device(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv, + struct drm_file *file_priv); +void amdgpu_xcp_release_sched(struct amdgpu_device *adev, + struct amdgpu_ctx_entity *entity); + +#define amdgpu_xcp_select_scheds(adev, e, c, d, x, y) \ + ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \ + (adev)->xcp_mgr->funcs->select_scheds ? \ + (adev)->xcp_mgr->funcs->select_scheds((adev), (e), (c), (d), (x), (y)) : -ENOENT) +#define amdgpu_xcp_update_partition_sched_list(adev) \ + ((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \ + (adev)->xcp_mgr->funcs->update_partition_sched_list ? \ + (adev)->xcp_mgr->funcs->update_partition_sched_list(adev) : 0) + +static inline int amdgpu_xcp_get_num_xcp(struct amdgpu_xcp_mgr *xcp_mgr) +{ + if (!xcp_mgr) + return 1; + else + return xcp_mgr->num_xcps; +} + +static inline struct amdgpu_xcp * +amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from) +{ + if (!xcp_mgr) + return NULL; + + while (*from < MAX_XCP) { + if (xcp_mgr->xcp[*from].valid) + return &xcp_mgr->xcp[*from]; + ++(*from); + } + + return NULL; +} + +#define for_each_xcp(xcp_mgr, xcp, i) \ + for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \ + xcp = amdgpu_get_next_xcp(xcp_mgr, &i)) + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 439925477fb8..03dc59cbe8aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -325,6 +325,36 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev, } +static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i; + + for (i = 0; i < top->num_nodes; i++) + sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_hops); + + return sysfs_emit(buf, "%s\n", buf); +} + +static ssize_t amdgpu_xgmi_show_num_links(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i; + + for (i = 0; i < top->num_nodes; i++) + sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_links); + + return sysfs_emit(buf, "%s\n", buf); +} + #define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) static ssize_t amdgpu_xgmi_show_error(struct device *dev, struct device_attribute *attr, @@ -361,6 +391,8 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev, static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); +static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL); +static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL); static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) @@ -380,6 +412,15 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, if (ret) pr_err("failed to create xgmi_error\n"); + /* Create xgmi num hops file */ + ret = device_create_file(adev->dev, &dev_attr_xgmi_num_hops); + if (ret) + pr_err("failed to create xgmi_num_hops\n"); + + /* Create xgmi num links file */ + ret = device_create_file(adev->dev, &dev_attr_xgmi_num_links); + if (ret) + pr_err("failed to create xgmi_num_links\n"); /* Create sysfs link to hive info folder on the first device */ if (hive->kobj.parent != (&adev->dev->kobj)) { @@ -407,6 +448,9 @@ remove_link: remove_file: device_remove_file(adev->dev, &dev_attr_xgmi_device_id); + device_remove_file(adev->dev, &dev_attr_xgmi_error); + device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); + device_remove_file(adev->dev, &dev_attr_xgmi_num_links); success: return ret; @@ -420,6 +464,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev, device_remove_file(adev->dev, &dev_attr_xgmi_device_id); device_remove_file(adev->dev, &dev_attr_xgmi_error); + device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); + device_remove_file(adev->dev, &dev_attr_xgmi_num_links); if (hive->kobj.parent != (&adev->dev->kobj)) sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info"); @@ -1014,7 +1060,8 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, } /* Trigger XGMI/WAFL error */ -static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if) +static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, + void *inject_if, uint32_t instance_mask) { int ret = 0; struct ta_ras_trigger_error_input *block_info = @@ -1026,7 +1073,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *injec if (amdgpu_dpm_allow_xgmi_power_down(adev, false)) dev_warn(adev->dev, "Failed to disallow XGMI power down"); - ret = psp_ras_trigger_error(&adev->psp, block_info); + ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); if (amdgpu_ras_intr_triggered()) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 24d42d24e6a0..104a5ad8397d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -70,7 +70,6 @@ enum amd_sriov_ucode_engine_id { AMD_SRIOV_UCODE_ID_RLC_SRLS, AMD_SRIOV_UCODE_ID_MEC, AMD_SRIOV_UCODE_ID_MEC2, - AMD_SRIOV_UCODE_ID_IMU, AMD_SRIOV_UCODE_ID_SOS, AMD_SRIOV_UCODE_ID_ASD, AMD_SRIOV_UCODE_ID_TA_RAS, diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c new file mode 100644 index 000000000000..16471b81a1f5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c @@ -0,0 +1,658 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" + +#include "soc15_common.h" +#include "amdgpu_xcp.h" +#include "gfx_v9_4_3.h" +#include "gfxhub_v1_2.h" +#include "sdma_v4_4_2.h" + +#define XCP_INST_MASK(num_inst, xcp_id) \ + (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0) + +#define AMDGPU_XCP_OPS_KFD (1 << 0) + +void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) +{ + int i; + + adev->doorbell_index.kiq = AMDGPU_DOORBELL_LAYOUT1_KIQ_START; + + adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START; + + adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END; + adev->doorbell_index.xcc_doorbell_range = AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE; + + adev->doorbell_index.sdma_doorbell_range = 20; + for (i = 0; i < adev->sdma.num_instances; i++) + adev->doorbell_index.sdma_engine[i] = + AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START + + i * (adev->doorbell_index.sdma_doorbell_range >> 1); + + adev->doorbell_index.ih = AMDGPU_DOORBELL_LAYOUT1_IH; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL_LAYOUT1_VCN_START; + + adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP; + adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP; + + adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1; +} + +static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, + uint32_t inst_idx, struct amdgpu_ring *ring) +{ + int xcp_id; + enum AMDGPU_XCP_IP_BLOCK ip_blk; + uint32_t inst_mask; + + ring->xcp_id = ~0; + if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + return; + + inst_mask = 1 << inst_idx; + + switch (ring->funcs->type) { + case AMDGPU_HW_IP_GFX: + case AMDGPU_RING_TYPE_COMPUTE: + case AMDGPU_RING_TYPE_KIQ: + ip_blk = AMDGPU_XCP_GFX; + break; + case AMDGPU_RING_TYPE_SDMA: + ip_blk = AMDGPU_XCP_SDMA; + break; + case AMDGPU_RING_TYPE_VCN_ENC: + case AMDGPU_RING_TYPE_VCN_JPEG: + ip_blk = AMDGPU_XCP_VCN; + if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + inst_mask = 1 << (inst_idx * 2); + break; + default: + DRM_ERROR("Not support ring type %d!", ring->funcs->type); + return; + } + + for (xcp_id = 0; xcp_id < adev->xcp_mgr->num_xcps; xcp_id++) { + if (adev->xcp_mgr->xcp[xcp_id].ip[ip_blk].inst_mask & inst_mask) { + ring->xcp_id = xcp_id; + break; + } + } +} + +static void aqua_vanjaram_xcp_gpu_sched_update( + struct amdgpu_device *adev, + struct amdgpu_ring *ring, + unsigned int sel_xcp_id) +{ + unsigned int *num_gpu_sched; + + num_gpu_sched = &adev->xcp_mgr->xcp[sel_xcp_id] + .gpu_sched[ring->funcs->type][ring->hw_prio].num_scheds; + adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[ring->funcs->type][ring->hw_prio] + .sched[(*num_gpu_sched)++] = &ring->sched; + DRM_DEBUG("%s :[%d] gpu_sched[%d][%d] = %d", ring->name, + sel_xcp_id, ring->funcs->type, + ring->hw_prio, *num_gpu_sched); +} + +static int aqua_vanjaram_xcp_sched_list_update( + struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int i; + + for (i = 0; i < MAX_XCP; i++) { + atomic_set(&adev->xcp_mgr->xcp[i].ref_cnt, 0); + memset(adev->xcp_mgr->xcp[i].gpu_sched, 0, sizeof(adev->xcp_mgr->xcp->gpu_sched)); + } + + if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + return 0; + + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + ring = adev->rings[i]; + if (!ring || !ring->sched.ready) + continue; + + aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id); + + /* VCN is shared by two partitions under CPX MODE */ + if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && + adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1); + } + + return 0; +} + +static int aqua_vanjaram_update_partition_sched_list(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->num_rings; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE || + ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + aqua_vanjaram_set_xcp_id(adev, ring->xcc_id, ring); + else + aqua_vanjaram_set_xcp_id(adev, ring->me, ring); + } + + return aqua_vanjaram_xcp_sched_list_update(adev); +} + +static int aqua_vanjaram_select_scheds( + struct amdgpu_device *adev, + u32 hw_ip, + u32 hw_prio, + struct amdgpu_fpriv *fpriv, + unsigned int *num_scheds, + struct drm_gpu_scheduler ***scheds) +{ + u32 sel_xcp_id; + int i; + + if (fpriv->xcp_id == ~0) { + u32 least_ref_cnt = ~0; + + fpriv->xcp_id = 0; + for (i = 0; i < adev->xcp_mgr->num_xcps; i++) { + u32 total_ref_cnt; + + total_ref_cnt = atomic_read(&adev->xcp_mgr->xcp[i].ref_cnt); + if (total_ref_cnt < least_ref_cnt) { + fpriv->xcp_id = i; + least_ref_cnt = total_ref_cnt; + } + } + } + sel_xcp_id = fpriv->xcp_id; + + if (adev->xcp_mgr->xcp[sel_xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds) { + *num_scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].num_scheds; + *scheds = adev->xcp_mgr->xcp[fpriv->xcp_id].gpu_sched[hw_ip][hw_prio].sched; + atomic_inc(&adev->xcp_mgr->xcp[sel_xcp_id].ref_cnt); + DRM_DEBUG("Selected partition #%d", sel_xcp_id); + } else { + DRM_ERROR("Failed to schedule partition #%d.", sel_xcp_id); + return -ENOENT; + } + + return 0; +} + +static int8_t aqua_vanjaram_logical_to_dev_inst(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + int8_t inst) +{ + int8_t dev_inst; + + switch (block) { + case GC_HWIP: + case SDMA0_HWIP: + /* Both JPEG and VCN as JPEG is only alias of VCN */ + case VCN_HWIP: + dev_inst = adev->ip_map.dev_inst[block][inst]; + break; + default: + /* For rest of the IPs, no look up required. + * Assume 'logical instance == physical instance' for all configs. */ + dev_inst = inst; + break; + } + + return dev_inst; +} + +static uint32_t aqua_vanjaram_logical_to_dev_mask(struct amdgpu_device *adev, + enum amd_hw_ip_block_type block, + uint32_t mask) +{ + uint32_t dev_mask = 0; + int8_t log_inst, dev_inst; + + while (mask) { + log_inst = ffs(mask) - 1; + dev_inst = aqua_vanjaram_logical_to_dev_inst(adev, block, log_inst); + dev_mask |= (1 << dev_inst); + mask &= ~(1 << log_inst); + } + + return dev_mask; +} + +static void aqua_vanjaram_populate_ip_map(struct amdgpu_device *adev, + enum amd_hw_ip_block_type ip_block, + uint32_t inst_mask) +{ + int l = 0, i; + + while (inst_mask) { + i = ffs(inst_mask) - 1; + adev->ip_map.dev_inst[ip_block][l++] = i; + inst_mask &= ~(1 << i); + } + for (; l < HWIP_MAX_INSTANCE; l++) + adev->ip_map.dev_inst[ip_block][l] = -1; +} + +void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev) +{ + u32 ip_map[][2] = { + { GC_HWIP, adev->gfx.xcc_mask }, + { SDMA0_HWIP, adev->sdma.sdma_mask }, + { VCN_HWIP, adev->vcn.inst_mask }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(ip_map); ++i) + aqua_vanjaram_populate_ip_map(adev, ip_map[i][0], ip_map[i][1]); + + adev->ip_map.logical_to_dev_inst = aqua_vanjaram_logical_to_dev_inst; + adev->ip_map.logical_to_dev_mask = aqua_vanjaram_logical_to_dev_mask; +} + +/* Fixed pattern for smn addressing on different AIDs: + * bit[34]: indicate cross AID access + * bit[33:32]: indicate target AID id + * AID id range is 0 ~ 3 as maximum AID number is 4. + */ +u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id) +{ + u64 ext_offset; + + /* local routing and bit[34:32] will be zeros */ + if (ext_id == 0) + return 0; + + /* Initiated from host, accessing to all non-zero aids are cross traffic */ + ext_offset = ((u64)(ext_id & 0x3) << 32) | (1ULL << 34); + + return ext_offset; +} + +static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) +{ + enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; + struct amdgpu_device *adev = xcp_mgr->adev; + + if (adev->nbio.funcs->get_compute_partition_mode) + mode = adev->nbio.funcs->get_compute_partition_mode(adev); + + return mode; +} + +static int __aqua_vanjaram_get_xcc_per_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int mode) +{ + int num_xcc, num_xcc_per_xcp = 0; + + num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask); + + switch (mode) { + case AMDGPU_SPX_PARTITION_MODE: + num_xcc_per_xcp = num_xcc; + break; + case AMDGPU_DPX_PARTITION_MODE: + num_xcc_per_xcp = num_xcc / 2; + break; + case AMDGPU_TPX_PARTITION_MODE: + num_xcc_per_xcp = num_xcc / 3; + break; + case AMDGPU_QPX_PARTITION_MODE: + num_xcc_per_xcp = num_xcc / 4; + break; + case AMDGPU_CPX_PARTITION_MODE: + num_xcc_per_xcp = 1; + break; + } + + return num_xcc_per_xcp; +} + +static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id, + enum AMDGPU_XCP_IP_BLOCK ip_id, + struct amdgpu_xcp_ip *ip) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int num_xcc_xcp, num_sdma_xcp, num_vcn_xcp; + int num_sdma, num_vcn; + + num_sdma = adev->sdma.num_instances; + num_vcn = adev->vcn.num_vcn_inst; + + switch (xcp_mgr->mode) { + case AMDGPU_SPX_PARTITION_MODE: + num_sdma_xcp = num_sdma; + num_vcn_xcp = num_vcn; + break; + case AMDGPU_DPX_PARTITION_MODE: + num_sdma_xcp = num_sdma / 2; + num_vcn_xcp = num_vcn / 2; + break; + case AMDGPU_TPX_PARTITION_MODE: + num_sdma_xcp = num_sdma / 3; + num_vcn_xcp = num_vcn / 3; + break; + case AMDGPU_QPX_PARTITION_MODE: + num_sdma_xcp = num_sdma / 4; + num_vcn_xcp = num_vcn / 4; + break; + case AMDGPU_CPX_PARTITION_MODE: + num_sdma_xcp = 2; + num_vcn_xcp = num_vcn ? 1 : 0; + break; + default: + return -EINVAL; + } + + num_xcc_xcp = adev->gfx.num_xcc_per_xcp; + + switch (ip_id) { + case AMDGPU_XCP_GFXHUB: + ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id); + ip->ip_funcs = &gfxhub_v1_2_xcp_funcs; + break; + case AMDGPU_XCP_GFX: + ip->inst_mask = XCP_INST_MASK(num_xcc_xcp, xcp_id); + ip->ip_funcs = &gfx_v9_4_3_xcp_funcs; + break; + case AMDGPU_XCP_SDMA: + ip->inst_mask = XCP_INST_MASK(num_sdma_xcp, xcp_id); + ip->ip_funcs = &sdma_v4_4_2_xcp_funcs; + break; + case AMDGPU_XCP_VCN: + ip->inst_mask = XCP_INST_MASK(num_vcn_xcp, xcp_id); + /* TODO : Assign IP funcs */ + break; + default: + return -EINVAL; + } + + ip->ip_id = ip_id; + + return 0; +} + +static enum amdgpu_gfx_partition +__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int num_xcc; + + num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask); + + if (adev->gmc.num_mem_partitions == 1) + return AMDGPU_SPX_PARTITION_MODE; + + if (adev->gmc.num_mem_partitions == num_xcc) + return AMDGPU_CPX_PARTITION_MODE; + + if (adev->gmc.num_mem_partitions == num_xcc / 2) + return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE : + AMDGPU_QPX_PARTITION_MODE; + + if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU)) + return AMDGPU_DPX_PARTITION_MODE; + + return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; +} + +static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, + enum amdgpu_gfx_partition mode) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int num_xcc, num_xccs_per_xcp; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (mode) { + case AMDGPU_SPX_PARTITION_MODE: + return adev->gmc.num_mem_partitions == 1 && num_xcc > 0; + case AMDGPU_DPX_PARTITION_MODE: + return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0; + case AMDGPU_TPX_PARTITION_MODE: + return (adev->gmc.num_mem_partitions == 1 || + adev->gmc.num_mem_partitions == 3) && + ((num_xcc % 3) == 0); + case AMDGPU_QPX_PARTITION_MODE: + num_xccs_per_xcp = num_xcc / 4; + return (adev->gmc.num_mem_partitions == 1 || + adev->gmc.num_mem_partitions == 4) && + (num_xccs_per_xcp >= 2); + case AMDGPU_CPX_PARTITION_MODE: + return ((num_xcc > 1) && + (adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 4) && + (num_xcc % adev->gmc.num_mem_partitions) == 0); + default: + return false; + } + + return false; +} + +static int __aqua_vanjaram_pre_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) +{ + /* TODO: + * Stop user queues and threads, and make sure GPU is empty of work. + */ + + if (flags & AMDGPU_XCP_OPS_KFD) + amdgpu_amdkfd_device_fini_sw(xcp_mgr->adev); + + return 0; +} + +static int __aqua_vanjaram_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) +{ + int ret = 0; + + if (flags & AMDGPU_XCP_OPS_KFD) { + amdgpu_amdkfd_device_probe(xcp_mgr->adev); + amdgpu_amdkfd_device_init(xcp_mgr->adev); + /* If KFD init failed, return failure */ + if (!xcp_mgr->adev->kfd.init_complete) + ret = -EIO; + } + + return ret; +} + +static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, + int mode, int *num_xcps) +{ + int num_xcc_per_xcp, num_xcc, ret; + struct amdgpu_device *adev; + u32 flags = 0; + + adev = xcp_mgr->adev; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) { + mode = __aqua_vanjaram_get_auto_mode(xcp_mgr); + } else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) { + dev_err(adev->dev, + "Invalid compute partition mode requested, requested: %s, available memory partitions: %d", + amdgpu_gfx_compute_mode_desc(mode), adev->gmc.num_mem_partitions); + return -EINVAL; + } + + if (adev->kfd.init_complete) + flags |= AMDGPU_XCP_OPS_KFD; + + if (flags & AMDGPU_XCP_OPS_KFD) { + ret = amdgpu_amdkfd_check_and_lock_kfd(adev); + if (ret) + goto out; + } + + ret = __aqua_vanjaram_pre_partition_switch(xcp_mgr, flags); + if (ret) + goto unlock; + + num_xcc_per_xcp = __aqua_vanjaram_get_xcc_per_xcp(xcp_mgr, mode); + if (adev->gfx.funcs->switch_partition_mode) + adev->gfx.funcs->switch_partition_mode(xcp_mgr->adev, + num_xcc_per_xcp); + + /* Init info about new xcps */ + *num_xcps = num_xcc / num_xcc_per_xcp; + amdgpu_xcp_init(xcp_mgr, *num_xcps, mode); + + ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags); +unlock: + if (flags & AMDGPU_XCP_OPS_KFD) + amdgpu_amdkfd_unlock_kfd(adev); +out: + return ret; +} + +static int __aqua_vanjaram_get_xcp_mem_id(struct amdgpu_device *adev, + int xcc_id, uint8_t *mem_id) +{ + /* memory/spatial modes validation check is already done */ + *mem_id = xcc_id / adev->gfx.num_xcc_per_xcp; + *mem_id /= adev->xcp_mgr->num_xcp_per_mem_partition; + + return 0; +} + +static int aqua_vanjaram_get_xcp_mem_id(struct amdgpu_xcp_mgr *xcp_mgr, + struct amdgpu_xcp *xcp, uint8_t *mem_id) +{ + struct amdgpu_numa_info numa_info; + struct amdgpu_device *adev; + uint32_t xcc_mask; + int r, i, xcc_id; + + adev = xcp_mgr->adev; + /* TODO: BIOS is not returning the right info now + * Check on this later + */ + /* + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + */ + if (adev->gmc.num_mem_partitions == 1) { + /* Only one range */ + *mem_id = 0; + return 0; + } + + r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &xcc_mask); + if (r || !xcc_mask) + return -EINVAL; + + xcc_id = ffs(xcc_mask) - 1; + if (!adev->gmc.is_app_apu) + return __aqua_vanjaram_get_xcp_mem_id(adev, xcc_id, mem_id); + + r = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info); + + if (r) + return r; + + r = -EINVAL; + for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { + if (adev->gmc.mem_partitions[i].numa.node == numa_info.nid) { + *mem_id = i; + r = 0; + break; + } + } + + return r; +} + +static int aqua_vanjaram_get_xcp_ip_details(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id, + enum AMDGPU_XCP_IP_BLOCK ip_id, + struct amdgpu_xcp_ip *ip) +{ + if (!ip) + return -EINVAL; + + return __aqua_vanjaram_get_xcp_ip_info(xcp_mgr, xcp_id, ip_id, ip); +} + +struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = { + .switch_partition_mode = &aqua_vanjaram_switch_partition_mode, + .query_partition_mode = &aqua_vanjaram_query_partition_mode, + .get_ip_details = &aqua_vanjaram_get_xcp_ip_details, + .get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id, + .select_scheds = &aqua_vanjaram_select_scheds, + .update_partition_sched_list = &aqua_vanjaram_update_partition_sched_list +}; + +static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) +{ + int ret; + + ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, 1, + &aqua_vanjaram_xcp_funcs); + if (ret) + return ret; + + /* TODO: Default memory node affinity init */ + + return ret; +} + +int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) +{ + u32 mask, inst_mask = adev->sdma.sdma_mask; + int ret, i; + + /* generally 1 AID supports 4 instances */ + adev->sdma.num_inst_per_aid = 4; + adev->sdma.num_instances = NUM_SDMA(adev->sdma.sdma_mask); + + adev->aid_mask = i = 1; + inst_mask >>= adev->sdma.num_inst_per_aid; + + for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask; + inst_mask >>= adev->sdma.num_inst_per_aid, ++i) { + if ((inst_mask & mask) == mask) + adev->aid_mask |= (1 << i); + } + + /* Harvest config is not used for aqua vanjaram. VCN and JPEGs will be + * addressed based on logical instance ids. + */ + adev->vcn.harvest_config = 0; + adev->vcn.num_inst_per_aid = 1; + adev->vcn.num_vcn_inst = hweight32(adev->vcn.inst_mask); + adev->jpeg.harvest_config = 0; + adev->jpeg.num_inst_per_aid = 1; + adev->jpeg.num_jpeg_inst = hweight32(adev->jpeg.inst_mask); + + ret = aqua_vanjaram_xcp_mgr_init(adev); + if (ret) + return ret; + + aqua_vanjaram_ip_map_init(adev); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index de6d10390ab2..5641cf05d856 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1141,12 +1141,12 @@ static uint32_t cik_get_register_value(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 67d16236b216..52598fbc9b39 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -489,8 +489,6 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) #endif /* enable DMA IBs */ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - - ring->sched.ready = true; } cik_sdma_enable(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ab44c1391d52..be984f8c71c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3490,7 +3490,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance); + u32 sh_num, u32 instance, int xcc_id); static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev); @@ -3568,7 +3568,7 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, struct amdgpu_device *adev = kiq_ring->adev; uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; - if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) { + if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); return; } @@ -3636,7 +3636,7 @@ static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) { - adev->gfx.kiq.pmf = &gfx_v10_0_kiq_pm4_funcs; + adev->gfx.kiq[0].pmf = &gfx_v10_0_kiq_pm4_funcs; } static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev) @@ -4219,7 +4219,7 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev) const struct gfx_firmware_header_v1_0 *mec_hdr = NULL; - bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); /* take ownership of the relevant compute queues */ amdgpu_gfx_compute_queue_acquire(adev); @@ -4291,7 +4291,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); } -static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* in gfx10 the SIMD_ID is specified as part of the INSTANCE * field when performing a select_se_sh so it should be @@ -4318,7 +4318,7 @@ static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); } -static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -4329,7 +4329,7 @@ static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, dst); } -static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) @@ -4340,7 +4340,7 @@ static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { nv_grbm_select(adev, me, pipe, q, vm); } @@ -4461,7 +4461,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; else ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; @@ -4490,7 +4490,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX10_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -4550,7 +4550,7 @@ static int gfx_v10_0_sw_init(void *handle) /* KIQ event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT, - &adev->gfx.kiq.irq); + &adev->gfx.kiq[0].irq); if (r) return r; @@ -4614,8 +4614,8 @@ static int gfx_v10_0_sw_init(void *handle) for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, - j)) + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) continue; r = gfx_v10_0_compute_ring_init(adev, ring_id, @@ -4629,19 +4629,19 @@ static int gfx_v10_0_sw_init(void *handle) } if (!adev->enable_mes_kiq) { - r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE); + r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0); if (r) { DRM_ERROR("Failed to init KIQ BOs!\n"); return r; } - kiq = &adev->gfx.kiq; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + kiq = &adev->gfx.kiq[0]; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); if (r) return r; } - r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0); if (r) return r; @@ -4690,11 +4690,11 @@ static int gfx_v10_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_mqd_sw_fini(adev); + amdgpu_gfx_mqd_sw_fini(adev, 0); if (!adev->enable_mes_kiq) { - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); - amdgpu_gfx_kiq_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); + amdgpu_gfx_kiq_fini(adev, 0); } gfx_v10_0_pfp_fini(adev); @@ -4712,7 +4712,7 @@ static int gfx_v10_0_sw_fini(void *handle) } static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance) + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -4772,13 +4772,13 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v10_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); adev->gfx.config.backend_enable_mask = active_rbs; @@ -4825,6 +4825,29 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade #define DEFAULT_SH_MEM_BASES (0x6000) +static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device *adev, + uint32_t first_vmid, + uint32_t last_vmid) +{ + uint32_t data; + uint32_t trap_config_vmid_mask = 0; + int i; + + /* Calculate trap config vmid mask */ + for (i = first_vmid; i < last_vmid; i++) + trap_config_vmid_mask |= (1 << i); + + data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG, + VMID_SEL, trap_config_vmid_mask); + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, + TRAP_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); +} + static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -4856,6 +4879,9 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); } + + gfx_v10_0_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid, + AMDGPU_NUM_VMID); } static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) @@ -4907,7 +4933,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0); wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); /* * Set corresponding TCP bits for the inactive WGPs in @@ -4940,7 +4966,7 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) } } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -4978,7 +5004,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { nv_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -6073,7 +6099,6 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) u32 tmp; u32 rb_bufsz; u64 rb_addr, rptr_addr, wptr_gpu_addr; - u32 i; /* Set the write pointer delay */ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); @@ -6168,11 +6193,6 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v10_0_cp_gfx_start(adev); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } - return 0; } @@ -6214,7 +6234,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) CP_MEC_CNTL__MEC_ME2_HALT_MASK)); break; } - adev->gfx.kiq.ring.sched.ready = false; + adev->gfx.kiq[0].ring.sched.ready = false; } udelay(50); } @@ -6423,55 +6443,6 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, return 0; } -#ifdef BRING_UP_DEBUG -static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - struct v10_gfx_mqd *mqd = ring->mqd_ptr; - - /* set mmCP_GFX_HQD_WPTR/_HI to 0 */ - WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr); - WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi); - - /* set GFX_MQD_BASE */ - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr); - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set GFX_MQD_CONTROL */ - WREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control); - - /* set GFX_HQD_VMID to 0 */ - WREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid); - - WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY, - mqd->cp_gfx_hqd_queue_priority); - WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum); - - /* set GFX_HQD_BASE, similar as CP_RB_BASE */ - WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base); - WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi); - - /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */ - WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr); - WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi); - - /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */ - WREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl); - - /* set RB_WPTR_POLL_ADDR */ - WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo); - WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi); - - /* set RB_DOORBELL_CONTROL */ - WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control); - - /* active the queue */ - WREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active); - - return 0; -} -#endif - static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -6492,59 +6463,23 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1) gfx_v10_0_cp_gfx_set_doorbell(adev, ring); -#ifdef BRING_UP_DEBUG - gfx_v10_0_gfx_queue_init_register(ring); -#endif nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.me.mqd_backup[mqd_idx]) memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else if (amdgpu_in_reset(adev)) { - /* reset mqd with the backup copy */ + } else { + /* restore mqd with the backup copy */ if (adev->gfx.me.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; *ring->wptr_cpu_addr = 0; amdgpu_ring_clear_ring(ring); -#ifdef BRING_UP_DEBUG - mutex_lock(&adev->srbm_mutex); - nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - gfx_v10_0_gfx_queue_init_register(ring); - nv_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -#endif - } else { - amdgpu_ring_clear_ring(ring); } return 0; } -#ifndef BRING_UP_DEBUG -static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - int r, i; - - if (!kiq->pmf || !kiq->pmf->kiq_map_queues) - return -EINVAL; - - r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * - adev->gfx.num_gfx_rings); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]); - - return amdgpu_ring_test_helper(kiq_ring); -} -#endif - static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) { int r, i; @@ -6555,7 +6490,7 @@ static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - goto done; + return r; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { @@ -6565,23 +6500,14 @@ static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) } amdgpu_bo_unreserve(ring->mqd_obj); if (r) - goto done; + return r; } -#ifndef BRING_UP_DEBUG - r = gfx_v10_0_kiq_enable_kgq(adev); - if (r) - goto done; -#endif - r = gfx_v10_0_cp_gfx_start(adev); + + r = amdgpu_gfx_enable_kgq(adev, 0); if (r) - goto done; + return r; - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } -done: - return r; + return gfx_v10_0_cp_gfx_start(adev); } static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, @@ -6812,14 +6738,13 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct v10_compute_mqd *mqd = ring->mqd_ptr; - int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; gfx_v10_0_kiq_setting(ring); if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; @@ -6841,8 +6766,8 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); } return 0; @@ -6864,17 +6789,14 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ - /* reset MQD to a clean status */ + } else { + /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); - /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); - } else { - amdgpu_ring_clear_ring(ring); } return 0; @@ -6885,7 +6807,7 @@ static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev) struct amdgpu_ring *ring; int r; - ring = &adev->gfx.kiq.ring; + ring = &adev->gfx.kiq[0].ring; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) @@ -6901,7 +6823,6 @@ static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev) amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; return 0; } @@ -6929,7 +6850,7 @@ static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = amdgpu_gfx_enable_kcq(adev); + r = amdgpu_gfx_enable_kcq(adev, 0); done: return r; } @@ -7242,47 +7163,20 @@ static int gfx_v10_0_hw_init(void *handle) return r; } -#ifndef BRING_UP_DEBUG -static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); - if (!adev->job_hang) - return amdgpu_ring_test_helper(kiq_ring); - else - return 0; -} -#endif - static int gfx_v10_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); if (!adev->no_hw_access) { -#ifndef BRING_UP_DEBUG if (amdgpu_async_gfx_ring) { - r = gfx_v10_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } -#endif - if (amdgpu_gfx_disable_kcq(adev)) + + if (amdgpu_gfx_disable_kcq(adev, 0)) DRM_ERROR("KCQ disable failed\n"); } @@ -7574,7 +7468,7 @@ static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev) return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; } -static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -7615,7 +7509,7 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; @@ -7962,7 +7856,7 @@ static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_d static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (enable) { /* enable FGCG firstly*/ @@ -8001,7 +7895,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, AMD_CG_SUPPORT_GFX_3D_CGLS)) gfx_v10_0_enable_gui_idle_interrupt(adev, enable); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -8095,11 +7989,11 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) static void gfx_v10_cntl_pg(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); gfx_v10_cntl_power_gating(adev, enable); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = { @@ -8648,7 +8542,7 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; @@ -9156,7 +9050,7 @@ static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { uint32_t tmp, target; - struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); + struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); if (ring->me == 1) target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); @@ -9200,7 +9094,7 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { u8 me_id, pipe_id, queue_id; - struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); + struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; @@ -9377,7 +9271,7 @@ static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) { int i; - adev->gfx.kiq.ring.funcs = &gfx_v10_0_ring_funcs_kiq; + adev->gfx.kiq[0].ring.funcs = &gfx_v10_0_ring_funcs_kiq; for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx; @@ -9411,8 +9305,8 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs; - adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; - adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs; + adev->gfx.kiq[0].irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; + adev->gfx.kiq[0].irq.funcs = &gfx_v10_0_kiq_irq_funcs; adev->gfx.priv_reg_irq.num_types = 1; adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs; @@ -9549,7 +9443,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh( adev, disable_masks[i * 2 + j]); @@ -9570,7 +9464,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c4940b6ea1c4..690e121d9dda 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -112,7 +112,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance); + u32 sh_num, u32 instance, int xcc_id); static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); @@ -123,8 +123,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, uint16_t pasid, uint32_t flush_type, bool all_hub, uint8_t dst_sel); -static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev); -static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev); +static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, bool enable); @@ -192,7 +192,7 @@ static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, struct amdgpu_device *adev = kiq_ring->adev; uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; - if (adev->enable_mes && !adev->gfx.kiq.ring.sched.ready) { + if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); return; } @@ -260,7 +260,7 @@ static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) { - adev->gfx.kiq.pmf = &gfx_v11_0_kiq_pm4_funcs; + adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; } static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) @@ -463,6 +463,23 @@ out: return err; } +static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + case IP_VERSION(11, 0, 3): + if ((adev->gfx.me_fw_version >= 1505) && + (adev->gfx.pfp_fw_version >= 1600) && + (adev->gfx.mec_fw_version >= 512)) + adev->gfx.cp_gfx_shadow = true; + break; + default: + adev->gfx.cp_gfx_shadow = false; + break; + } +} + static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) { char fw_name[40]; @@ -539,6 +556,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) /* only one MEC for gfx 11.0.0. */ adev->gfx.mec2_fw = NULL; + gfx_v11_0_check_fw_cp_gfx_shadow(adev); out: if (err) { amdgpu_ucode_release(&adev->gfx.pfp_fw); @@ -699,7 +717,7 @@ static int gfx_v11_0_mec_init(struct amdgpu_device *adev) u32 *hpd; size_t mec_hpd_size; - bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); /* take ownership of the relevant compute queues */ amdgpu_gfx_compute_queue_acquire(adev); @@ -747,7 +765,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); } -static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* in gfx11 the SIMD_ID is specified as part of the INSTANCE * field when performing a select_se_sh so it should be @@ -773,7 +791,7 @@ static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); } -static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -784,7 +802,7 @@ static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, dst); } -static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) @@ -795,11 +813,32 @@ static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { soc21_grbm_select(adev, me, pipe, q, vm); } +/* all sizes are in bytes */ +#define MQD_SHADOW_BASE_SIZE 73728 +#define MQD_SHADOW_BASE_ALIGNMENT 256 +#define MQD_FWWORKAREA_SIZE 484 +#define MQD_FWWORKAREA_ALIGNMENT 256 + +static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, + struct amdgpu_gfx_shadow_info *shadow_info) +{ + if (adev->gfx.cp_gfx_shadow) { + shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; + shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; + shadow_info->csa_size = MQD_FWWORKAREA_SIZE; + shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; + return 0; + } else { + memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); + return -ENOTSUPP; + } +} + static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, .select_se_sh = &gfx_v11_0_select_se_sh, @@ -808,6 +847,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, + .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, }; static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) @@ -866,7 +906,7 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; else ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; @@ -897,7 +937,7 @@ static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX11_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -1367,8 +1407,8 @@ static int gfx_v11_0_sw_init(void *handle) for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, - j)) + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) continue; r = gfx_v11_0_compute_ring_init(adev, ring_id, @@ -1382,19 +1422,19 @@ static int gfx_v11_0_sw_init(void *handle) } if (!adev->enable_mes_kiq) { - r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE); + r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); if (r) { DRM_ERROR("Failed to init KIQ BOs!\n"); return r; } - kiq = &adev->gfx.kiq; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + kiq = &adev->gfx.kiq[0]; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); if (r) return r; } - r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); if (r) return r; @@ -1456,11 +1496,11 @@ static int gfx_v11_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_mqd_sw_fini(adev); + amdgpu_gfx_mqd_sw_fini(adev, 0); if (!adev->enable_mes_kiq) { - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); - amdgpu_gfx_kiq_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); + amdgpu_gfx_kiq_fini(adev, 0); } gfx_v11_0_pfp_fini(adev); @@ -1477,7 +1517,7 @@ static int gfx_v11_0_sw_fini(void *handle) } static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance) + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -1598,6 +1638,7 @@ static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) /* Enable trap for each kfd vmid. */ data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); } soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -1667,7 +1708,7 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { soc21_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -3188,7 +3229,6 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) u32 tmp; u32 rb_bufsz; u64 rb_addr, rptr_addr, wptr_gpu_addr; - u32 i; /* Set the write pointer delay */ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); @@ -3280,11 +3320,6 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v11_0_cp_gfx_start(adev); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } - return 0; } @@ -3330,8 +3365,6 @@ static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); } - adev->gfx.kiq.ring.sched.ready = enable; - udelay(50); } @@ -3633,55 +3666,6 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, return 0; } -#ifdef BRING_UP_DEBUG -static int gfx_v11_0_gfx_queue_init_register(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - struct v11_gfx_mqd *mqd = ring->mqd_ptr; - - /* set mmCP_GFX_HQD_WPTR/_HI to 0 */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr); - WREG32_SOC15(GC, 0, regCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi); - - /* set GFX_MQD_BASE */ - WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr); - WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set GFX_MQD_CONTROL */ - WREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control); - - /* set GFX_HQD_VMID to 0 */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid); - - WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY, - mqd->cp_gfx_hqd_queue_priority); - WREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum); - - /* set GFX_HQD_BASE, similar as CP_RB_BASE */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base); - WREG32_SOC15(GC, 0, regCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi); - - /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr); - WREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi); - - /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl); - - /* set RB_WPTR_POLL_ADDR */ - WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo); - WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi); - - /* set RB_DOORBELL_CONTROL */ - WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control); - - /* active the queue */ - WREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active); - - return 0; -} -#endif - static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -3693,59 +3677,23 @@ static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring) mutex_lock(&adev->srbm_mutex); soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); amdgpu_ring_init_mqd(ring); -#ifdef BRING_UP_DEBUG - gfx_v11_0_gfx_queue_init_register(ring); -#endif soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.me.mqd_backup[mqd_idx]) memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else if (amdgpu_in_reset(adev)) { - /* reset mqd with the backup copy */ + } else { + /* restore mqd with the backup copy */ if (adev->gfx.me.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; *ring->wptr_cpu_addr = 0; amdgpu_ring_clear_ring(ring); -#ifdef BRING_UP_DEBUG - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - gfx_v11_0_gfx_queue_init_register(ring); - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -#endif - } else { - amdgpu_ring_clear_ring(ring); } return 0; } -#ifndef BRING_UP_DEBUG -static int gfx_v11_0_kiq_enable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - int r, i; - - if (!kiq->pmf || !kiq->pmf->kiq_map_queues) - return -EINVAL; - - r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * - adev->gfx.num_gfx_rings); - if (r) { - DRM_ERROR("Failed to lock KIQ (%d).\n", r); - return r; - } - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]); - - return amdgpu_ring_test_helper(kiq_ring); -} -#endif - static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) { int r, i; @@ -3756,7 +3704,7 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - goto done; + return r; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { @@ -3766,23 +3714,14 @@ static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) } amdgpu_bo_unreserve(ring->mqd_obj); if (r) - goto done; + return r; } -#ifndef BRING_UP_DEBUG - r = gfx_v11_0_kiq_enable_kgq(adev); - if (r) - goto done; -#endif - r = gfx_v11_0_cp_gfx_start(adev); + + r = amdgpu_gfx_enable_kgq(adev, 0); if (r) - goto done; + return r; - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } -done: - return r; + return gfx_v11_0_cp_gfx_start(adev); } static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, @@ -4028,14 +3967,13 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct v11_compute_mqd *mqd = ring->mqd_ptr; - int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; gfx_v11_0_kiq_setting(ring); if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; @@ -4057,8 +3995,8 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); } return 0; @@ -4080,17 +4018,14 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ - /* reset MQD to a clean status */ + } else { + /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); - /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); - } else { - amdgpu_ring_clear_ring(ring); } return 0; @@ -4101,7 +4036,7 @@ static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) struct amdgpu_ring *ring; int r; - ring = &adev->gfx.kiq.ring; + ring = &adev->gfx.kiq[0].ring; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) @@ -4146,7 +4081,7 @@ static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = amdgpu_gfx_enable_kcq(adev); + r = amdgpu_gfx_enable_kcq(adev, 0); done: return r; } @@ -4239,7 +4174,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) false : true; adev->gfxhub.funcs->set_fault_enable_default(adev, value); - amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); + amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); return 0; } @@ -4407,48 +4342,20 @@ static int gfx_v11_0_hw_init(void *handle) return r; } -#ifndef BRING_UP_DEBUG -static int gfx_v11_0_kiq_disable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r = 0; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); - - if (adev->gfx.kiq.ring.sched.ready) - r = amdgpu_ring_test_helper(kiq_ring); - - return r; -} -#endif - static int gfx_v11_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); if (!adev->no_hw_access) { -#ifndef BRING_UP_DEBUG if (amdgpu_async_gfx_ring) { - r = gfx_v11_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } -#endif - if (amdgpu_gfx_disable_kcq(adev)) + + if (amdgpu_gfx_disable_kcq(adev, 0)) DRM_ERROR("KCQ disable failed\n"); amdgpu_mes_kiq_hw_fini(adev); @@ -4525,7 +4432,7 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_set_safe_mode(adev); + gfx_v11_0_set_safe_mode(adev, 0); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { @@ -4625,7 +4532,7 @@ static int gfx_v11_0_soft_reset(void *handle) tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - gfx_v11_0_unset_safe_mode(adev); + gfx_v11_0_unset_safe_mode(adev, 0); return gfx_v11_0_cp_resume(adev); } @@ -4794,7 +4701,7 @@ static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; } -static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -4813,7 +4720,7 @@ static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); } @@ -5041,7 +4948,7 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); @@ -5061,7 +4968,7 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, AMD_CG_SUPPORT_GFX_3D_CGLS)) gfx_v11_0_enable_gui_idle_interrupt(adev, enable); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -5129,11 +5036,11 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); gfx_v11_cntl_power_gating(adev, enable); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v11_0_set_powergating_state(void *handle, @@ -5592,6 +5499,29 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } +static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, + u64 shadow_va, u64 csa_va, + u64 gds_va, bool init_shadow, + int vmid) +{ + struct amdgpu_device *adev = ring->adev; + + if (!adev->gfx.cp_gfx_shadow) + return; + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); + amdgpu_ring_write(ring, lower_32_bits(shadow_va)); + amdgpu_ring_write(ring, upper_32_bits(shadow_va)); + amdgpu_ring_write(ring, lower_32_bits(gds_va)); + amdgpu_ring_write(ring, upper_32_bits(gds_va)); + amdgpu_ring_write(ring, lower_32_bits(csa_va)); + amdgpu_ring_write(ring, upper_32_bits(csa_va)); + amdgpu_ring_write(ring, shadow_va ? + PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); + amdgpu_ring_write(ring, init_shadow ? + PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); +} + static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) { unsigned ret; @@ -5623,7 +5553,7 @@ static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; @@ -6091,7 +6021,7 @@ static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { uint32_t tmp, target; - struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); + struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); target += ring->pipe; @@ -6182,6 +6112,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .set_wptr = gfx_v11_0_ring_set_wptr_gfx, .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5 + /* COND_EXEC */ + 9 + /* SET_Q_PREEMPTION_MODE */ 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + @@ -6208,6 +6139,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, + .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v11_0_ring_preempt_ib, @@ -6288,7 +6220,7 @@ static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) { int i; - adev->gfx.kiq.ring.funcs = &gfx_v11_0_ring_funcs_kiq; + adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; @@ -6437,7 +6369,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { mask = 1; counter = 0; - gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 8 && j < 2) gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( adev, disable_masks[i * 2 + j]); @@ -6469,7 +6401,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, active_cu_number += counter; } } - gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index 068b9586a223..26d6286d86c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -84,8 +84,20 @@ static int gfx_v11_0_3_poison_consumption_handler(struct amdgpu_device *adev, /* Workaround: when vmid and pasid are both zero, trigger gpu reset in KGD. */ if (entry && (entry->client_id == SOC21_IH_CLIENTID_GFX) && (entry->src_id == GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT) && - !entry->vmid && !entry->pasid) + !entry->vmid && !entry->pasid) { + uint32_t rlc_status0 = 0; + + rlc_status0 = RREG32_SOC15(GC, 0, regRLC_RLCS_FED_STATUS_0); + + if (REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA0_FED_ERR) || + REG_GET_FIELD(rlc_status0, RLC_RLCS_FED_STATUS_0, SDMA1_FED_ERR)) { + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; + } + amdgpu_ras_reset_gpu(adev); + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index c41219e23151..da6caff78c22 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1285,7 +1285,7 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) } static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, - u32 sh_num, u32 instance) + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -1438,12 +1438,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, } /* GRBM_GFX_INDEX has a different offset on SI */ - gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); } /* GRBM_GFX_INDEX has a different offset on SI */ - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); } static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) @@ -1459,14 +1459,14 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v6_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.num_rbs = hweight32(active_rbs); @@ -1487,7 +1487,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); adev->gfx.config.rb_config[i][j].rb_backend_disable = RREG32(mmCC_RB_BACKEND_DISABLE); adev->gfx.config.rb_config[i][j].user_rb_backend_disable = @@ -1496,7 +1496,7 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) RREG32(mmPA_SC_RASTER_CONFIG); } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1535,7 +1535,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = RREG32(mmSPI_STATIC_THREAD_MGMT_3); active_cu = gfx_v6_0_get_cu_enabled(adev); @@ -1550,7 +1550,7 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) } } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -2391,7 +2391,7 @@ static void gfx_v6_0_enable_lbpw(struct amdgpu_device *adev, bool enable) WREG32_FIELD(RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); if (!enable) { - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmSPI_LB_CU_MASK, 0x00ff); } } @@ -2968,7 +2968,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32(mmSQ_IND_DATA); } -static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 0 wave data */ dst[(*no_fields)++] = 0; @@ -2993,7 +2993,7 @@ static void gfx_v6_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -3003,7 +3003,7 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { DRM_INFO("Not implemented\n"); } @@ -3028,6 +3028,7 @@ static int gfx_v6_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.xcc_mask = 1; adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), GFX6_NUM_COMPUTE_RINGS); @@ -3073,7 +3074,7 @@ static int gfx_v6_0_sw_init(void *handle) ring = &adev->gfx.gfx_ring[i]; ring->ring_obj = NULL; sprintf(ring->name, "gfx"); - r = amdgpu_ring_init(adev, ring, 1024, + r = amdgpu_ring_init(adev, ring, 2048, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -3571,7 +3572,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) mask = 1; ao_bitmap = 0; counter = 0; - gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v6_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); @@ -3593,7 +3594,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) } } - gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 9d5c1e29b4a3..8c174c11eaee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1548,11 +1548,12 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) * @sh_num: sh block to address * @instance: Certain registers are instanced per SE or SH. * 0xffffffff means broadcast to all SEs or SHs (CIK). - * + * @xcc_id: xcc accelerated compute core id * Select which SE, SH combinations to address. */ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, - u32 se_num, u32 sh_num, u32 instance) + u32 se_num, u32 sh_num, u32 instance, + int xcc_id) { u32 data; @@ -1732,13 +1733,13 @@ gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev, } /* GRBM_GFX_INDEX has a different offset on CI+ */ - gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); } /* GRBM_GFX_INDEX has a different offset on CI+ */ - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); } /** @@ -1761,13 +1762,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v7_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.num_rbs = hweight32(active_rbs); @@ -1790,7 +1791,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); adev->gfx.config.rb_config[i][j].rb_backend_disable = RREG32(mmCC_RB_BACKEND_DISABLE); adev->gfx.config.rb_config[i][j].user_rb_backend_disable = @@ -1801,7 +1802,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) RREG32(mmPA_SC_RASTER_CONFIG_1); } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1911,7 +1912,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev) * making sure that the following register writes will be broadcasted * to all the shaders */ - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ @@ -2728,7 +2729,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) u32 *hpd; size_t mec_hpd_size; - bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); /* take ownership of the relevant compute queues */ amdgpu_gfx_compute_queue_acquire(adev); @@ -3301,7 +3302,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -3309,7 +3310,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -3361,7 +3362,7 @@ static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev) return true; } -static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { u32 tmp, i, mask; @@ -3383,7 +3384,7 @@ static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { u32 tmp; @@ -3474,7 +3475,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) WREG32(mmRLC_LB_CNTR_MAX, 0x00008000); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff); WREG32(mmRLC_LB_PARAMS, 0x00600408); WREG32(mmRLC_LB_CNTL, 0x80000004); @@ -3530,7 +3531,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | @@ -3584,7 +3585,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | @@ -3635,7 +3636,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK; @@ -4111,7 +4112,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32(mmSQ_IND_DATA); } -static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 0 wave data */ dst[(*no_fields)++] = 0; @@ -4136,7 +4137,7 @@ static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -4146,7 +4147,7 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { cik_srbm_select(adev, me, pipe, q, vm); } @@ -4178,6 +4179,7 @@ static int gfx_v7_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.xcc_mask = 1; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); @@ -4456,7 +4458,8 @@ static int gfx_v7_0_sw_init(void *handle) for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) continue; r = gfx_v7_0_compute_ring_init(adev, @@ -5114,7 +5117,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) mask = 1; ao_bitmap = 0; counter = 0; - gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v7_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); @@ -5135,7 +5138,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b1f2684d854a..51c1745c8369 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1304,7 +1304,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) u32 *hpd; size_t mec_hpd_size; - bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); /* take ownership of the relevant compute queues */ amdgpu_gfx_compute_queue_acquire(adev); @@ -2001,7 +2001,8 @@ static int gfx_v8_0_sw_init(void *handle) for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) continue; r = gfx_v8_0_compute_ring_init(adev, @@ -2015,19 +2016,19 @@ static int gfx_v8_0_sw_init(void *handle) } } - r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); + r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0); if (r) { DRM_ERROR("Failed to init KIQ BOs!\n"); return r; } - kiq = &adev->gfx.kiq; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + kiq = &adev->gfx.kiq[0]; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); if (r) return r; /* create MQD for all compute queues as well as KIQ for SRIOV case */ - r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0); if (r) return r; @@ -2050,9 +2051,9 @@ static int gfx_v8_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_mqd_sw_fini(adev); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); - amdgpu_gfx_kiq_fini(adev); + amdgpu_gfx_mqd_sw_fini(adev, 0); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); + amdgpu_gfx_kiq_fini(adev, 0); gfx_v8_0_mec_fini(adev); amdgpu_gfx_rlc_fini(adev); @@ -3394,7 +3395,8 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) } static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, - u32 se_num, u32 sh_num, u32 instance) + u32 se_num, u32 sh_num, u32 instance, + int xcc_id) { u32 data; @@ -3417,7 +3419,7 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, } static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { vi_srbm_select(adev, me, pipe, q, vm); } @@ -3578,13 +3580,13 @@ gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, } /* GRBM_GFX_INDEX has a different offset on VI */ - gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); } /* GRBM_GFX_INDEX has a different offset on VI */ - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); } static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) @@ -3600,13 +3602,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v8_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.num_rbs = hweight32(active_rbs); @@ -3629,7 +3631,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); adev->gfx.config.rb_config[i][j].rb_backend_disable = RREG32(mmCC_RB_BACKEND_DISABLE); adev->gfx.config.rb_config[i][j].user_rb_backend_disable = @@ -3640,7 +3642,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) RREG32(mmPA_SC_RASTER_CONFIG_1); } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -3787,7 +3789,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev) * making sure that the following register writes will be broadcasted * to all the shaders */ - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmPA_SC_FIFO_SIZE, (adev->gfx.config.sc_prim_fifo_size_frontend << @@ -3818,7 +3820,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -3826,7 +3828,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } if (k == adev->usec_timeout) { gfx_v8_0_select_se_sh(adev, 0xffffffff, - 0xffffffff, 0xffffffff); + 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", i, j); @@ -3834,7 +3836,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -4281,7 +4283,6 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ amdgpu_ring_clear_ring(ring); gfx_v8_0_cp_gfx_start(adev); - ring->sched.ready = true; return 0; } @@ -4292,7 +4293,7 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) WREG32(mmCP_MEC_CNTL, 0); } else { WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); - adev->gfx.kiq.ring.sched.ready = false; + adev->gfx.kiq[0].ring.sched.ready = false; } udelay(50); } @@ -4314,12 +4315,12 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) { - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; uint64_t queue_mask = 0; int r, i; for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap)) continue; /* This situation may be hit in the future if a new HW @@ -4595,14 +4596,13 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct vi_mqd *mqd = ring->mqd_ptr; - int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; gfx_v8_0_kiq_setting(ring); if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; @@ -4625,8 +4625,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation)); } return 0; @@ -4650,15 +4650,13 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); - } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ - /* reset MQD to a clean status */ + } else { + /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; amdgpu_ring_clear_ring(ring); - } else { - amdgpu_ring_clear_ring(ring); } return 0; } @@ -4678,21 +4676,22 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) struct amdgpu_ring *ring; int r; - ring = &adev->gfx.kiq.ring; + ring = &adev->gfx.kiq[0].ring; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) return r; r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); - if (unlikely(r != 0)) + if (unlikely(r != 0)) { + amdgpu_bo_unreserve(ring->mqd_obj); return r; + } gfx_v8_0_kiq_init_queue(ring); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; return 0; } @@ -4741,7 +4740,7 @@ static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev) if (r) return r; - ring = &adev->gfx.kiq.ring; + ring = &adev->gfx.kiq[0].ring; r = amdgpu_ring_test_helper(ring); if (r) return r; @@ -4808,7 +4807,7 @@ static int gfx_v8_0_hw_init(void *handle) static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev) { int r, i; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); if (r) @@ -4902,7 +4901,7 @@ static int gfx_v8_0_hw_fini(void *handle) pr_debug("For SRIOV client, shouldn't do anything.\n"); return 0; } - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (!gfx_v8_0_wait_for_idle(adev)) gfx_v8_0_cp_enable(adev, false); else @@ -4911,7 +4910,7 @@ static int gfx_v8_0_hw_fini(void *handle) adev->gfx.rlc.funcs->stop(adev); else pr_err("rlc is busy, skip halt rlc\n"); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -5216,7 +5215,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32(mmSQ_IND_DATA); } -static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 0 wave data */ dst[(*no_fields)++] = 0; @@ -5241,7 +5240,7 @@ static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -5263,6 +5262,7 @@ static int gfx_v8_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.xcc_mask = 1; adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); @@ -5376,7 +5376,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, AMD_PG_SUPPORT_RLC_SMU_HS | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GFX_DMG)) - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); switch (adev->asic_type) { case CHIP_CARRIZO: case CHIP_STONEY: @@ -5430,7 +5430,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, AMD_PG_SUPPORT_RLC_SMU_HS | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GFX_DMG)) - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -5481,7 +5481,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, { uint32_t data; - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); @@ -5535,7 +5535,7 @@ static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev) return true; } -static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -5562,7 +5562,7 @@ static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -5621,7 +5621,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t temp, data; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { @@ -5717,7 +5717,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev gfx_v8_0_wait_for_rlc_serdes(adev); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -5727,7 +5727,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); @@ -5810,7 +5810,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev gfx_v8_0_wait_for_rlc_serdes(adev); - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) @@ -6723,11 +6723,11 @@ static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data, */ if (from_wq) { mutex_lock(&adev->grbm_idx_mutex); - gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); + gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0); sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -7001,7 +7001,7 @@ static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) { int i; - adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; + adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq; for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; @@ -7116,7 +7116,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) mask = 1; ao_bitmap = 0; counter = 0; - gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); + gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); if (i < 4 && j < 2) gfx_v8_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); @@ -7137,7 +7137,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a674c8a58dc2..65577eca58f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -760,7 +760,7 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, - void *inject_if); + void *inject_if, uint32_t instance_mask); static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, @@ -888,7 +888,7 @@ static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) { - adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; + adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs; } static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) @@ -1494,7 +1494,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) mask = 1; cu_bitmap = 0; counter = 0; - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (cu_info->bitmap[i][j] & mask) { @@ -1513,7 +1513,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = cu_bitmap; } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1535,7 +1535,7 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -1584,7 +1584,7 @@ static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -1703,7 +1703,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) const struct gfx_firmware_header_v1_0 *mec_hdr; - bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); /* take ownership of the relevant compute queues */ amdgpu_gfx_compute_queue_acquire(adev); @@ -1778,7 +1778,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); } -static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 1 wave data */ dst[(*no_fields)++] = 1; @@ -1799,7 +1799,7 @@ static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -1808,7 +1808,7 @@ static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, start + SQIND_WAVE_SGPRS_OFFSET, size, dst); } -static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) @@ -1819,9 +1819,9 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { - soc15_grbm_select(adev, me, pipe, q, vm); + soc15_grbm_select(adev, me, pipe, q, vm, 0); } static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { @@ -1995,7 +1995,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -2095,7 +2095,7 @@ static int gfx_v9_0_sw_init(void *handle) /* disable scheduler on the real ring */ ring->no_scheduler = true; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -2113,7 +2113,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; ring->is_sw_ring = true; hw_prio = amdgpu_sw_ring_priority(i); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, NULL); @@ -2144,7 +2144,8 @@ static int gfx_v9_0_sw_init(void *handle) for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) continue; r = gfx_v9_0_compute_ring_init(adev, @@ -2158,19 +2159,19 @@ static int gfx_v9_0_sw_init(void *handle) } } - r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); + r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); if (r) { DRM_ERROR("Failed to init KIQ BOs!\n"); return r; } - kiq = &adev->gfx.kiq; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + kiq = &adev->gfx.kiq[0]; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); if (r) return r; /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0); if (r) return r; @@ -2205,9 +2206,9 @@ static int gfx_v9_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_mqd_sw_fini(adev); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); - amdgpu_gfx_kiq_fini(adev); + amdgpu_gfx_mqd_sw_fini(adev, 0); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); + amdgpu_gfx_kiq_fini(adev, 0); gfx_v9_0_mec_fini(adev); amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, @@ -2230,7 +2231,7 @@ static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) } void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, - u32 instance) + u32 instance, int xcc_id) { u32 data; @@ -2279,19 +2280,42 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v9_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.num_rbs = hweight32(active_rbs); } +static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev, + uint32_t first_vmid, + uint32_t last_vmid) +{ + uint32_t data; + uint32_t trap_config_vmid_mask = 0; + int i; + + /* Calculate trap config vmid mask */ + for (i = first_vmid; i < last_vmid; i++) + trap_config_vmid_mask |= (1 << i); + + data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG, + VMID_SEL, trap_config_vmid_mask); + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, + TRAP_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); +} + #define DEFAULT_SH_MEM_BASES (0x6000) static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) { @@ -2313,12 +2337,12 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { - soc15_grbm_select(adev, 0, 0, 0, i); + soc15_grbm_select(adev, 0, 0, 0, i, 0); /* CP and shaders */ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); } - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA @@ -2356,8 +2380,8 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 1): tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); - tmp = REG_SET_FIELD(tmp, SQ_CONFIG, - DISABLE_BARRIER_WAITCNT, 1); + tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT, + !READ_ONCE(adev->barrier_has_auto_waitcnt)); WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); break; default: @@ -2382,8 +2406,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { - soc15_grbm_select(adev, 0, 0, 0, i); + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { + soc15_grbm_select(adev, 0, 0, 0, i, 0); /* CP and shaders */ if (i == 0) { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, @@ -2405,7 +2429,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); } } - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -2422,7 +2446,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -2430,7 +2454,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } if (k == adev->usec_timeout) { amdgpu_gfx_select_se_sh(adev, 0xffffffff, - 0xffffffff, 0xffffffff); + 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", i, j); @@ -2438,7 +2462,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -3133,7 +3157,6 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v9_0_cp_gfx_start(adev); - ring->sched.ready = true; return 0; } @@ -3145,7 +3168,7 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) } else { WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); - adev->gfx.kiq.ring.sched.ready = false; + adev->gfx.kiq[0].ring.sched.ready = false; } udelay(50); } @@ -3509,7 +3532,6 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; - int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; struct v9_mqd *tmp_mqd; gfx_v9_0_kiq_setting(ring); @@ -3519,20 +3541,20 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) * driver need to re-init the mqd. * check mqd->cp_hqd_pq_control since this value should not be 0 */ - tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; + tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup; if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ /* for GPU_RESET case , reset MQD to a clean status */ - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_0_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } else { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); @@ -3541,14 +3563,14 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_sriov_vf(adev) && adev->in_suspend) amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_0_mqd_init(ring); gfx_v9_0_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); } return 0; @@ -3572,24 +3594,21 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_0_mqd_init(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); - } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ - /* reset MQD to a clean status */ + } else { + /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); - /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); - } else { - amdgpu_ring_clear_ring(ring); } return 0; @@ -3600,7 +3619,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) struct amdgpu_ring *ring; int r; - ring = &adev->gfx.kiq.ring; + ring = &adev->gfx.kiq[0].ring; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) @@ -3616,7 +3635,6 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; return 0; } @@ -3644,7 +3662,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = amdgpu_gfx_enable_kcq(adev); + r = amdgpu_gfx_enable_kcq(adev, 0); done: return r; } @@ -3764,7 +3782,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) /* disable KCQ to avoid CPC touch memory not valid anymore */ - amdgpu_gfx_disable_kcq(adev); + amdgpu_gfx_disable_kcq(adev, 0); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); @@ -3782,11 +3800,11 @@ static int gfx_v9_0_hw_fini(void *handle) */ if (!amdgpu_in_reset(adev) && !adev->in_suspend) { mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, adev->gfx.kiq.ring.me, - adev->gfx.kiq.ring.pipe, - adev->gfx.kiq.ring.queue, 0); - gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, + adev->gfx.kiq[0].ring.pipe, + adev->gfx.kiq[0].ring.queue, 0, 0); + gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } @@ -3906,7 +3924,7 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) unsigned long flags; uint32_t seq, reg_val_offs = 0; uint64_t value = 0; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); @@ -4506,6 +4524,7 @@ static int gfx_v9_0_early_init(void *handle) adev->gfx.num_gfx_rings = 0; else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; + adev->gfx.xcc_mask = 1; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); gfx_v9_0_set_kiq_pm4_funcs(adev); @@ -4571,6 +4590,13 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + gfx_v9_4_2_debug_trap_config_init(adev, + adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); + else + gfx_v9_0_debug_trap_config_init(adev, + adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); + return 0; } @@ -4586,7 +4612,7 @@ static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) return true; } -static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -4603,7 +4629,7 @@ static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; @@ -4614,7 +4640,7 @@ static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { gfx_v9_0_enable_gfx_cg_power_gating(adev, true); @@ -4626,7 +4652,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, @@ -4653,7 +4679,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t data, def; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { @@ -4720,7 +4746,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev } } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, @@ -4731,7 +4757,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, if (!adev->gfx.num_gfx_rings) return; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* Enable 3D CGCG/CGLS */ if (enable) { @@ -4775,7 +4801,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -4783,7 +4809,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev { uint32_t def, data; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); @@ -4827,7 +4853,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, @@ -5425,7 +5451,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; @@ -6384,7 +6410,7 @@ static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { }; static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, - void *inject_if) + void *inject_if, uint32_t instance_mask) { struct ras_inject_if *info = (struct ras_inject_if *)inject_if; int ret; @@ -6423,7 +6449,7 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, block_info.value = info->value; mutex_lock(&adev->grbm_idx_mutex); - ret = psp_ras_trigger_error(&adev->psp, &block_info); + ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask); mutex_unlock(&adev->grbm_idx_mutex); return ret; @@ -6651,7 +6677,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - amdgpu_gfx_select_se_sh(adev, j, 0x0, k); + amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0); RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); } } @@ -6713,7 +6739,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - amdgpu_gfx_select_se_sh(adev, j, 0, k); + amdgpu_gfx_select_se_sh(adev, j, 0, k, 0); reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); if (reg_value) @@ -6728,7 +6754,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += sec_count; err_data->ue_count += ded_count; - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); gfx_v9_0_query_utc_edc_status(adev, err_data); @@ -7010,7 +7036,7 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) { int i; - adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; + adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq; for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; @@ -7191,7 +7217,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); gfx_v9_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_0_get_cu_active_bitmap(adev); @@ -7224,7 +7250,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h index dfe8d4841f58..f9f6edc5e558 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h @@ -27,6 +27,6 @@ extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block; void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, - u32 instance); + u32 instance, int xcc_id); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c index c67e387a97f5..bc8416afb62c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -970,29 +970,6 @@ static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); } -static int gfx_v9_4_ras_error_inject(struct amdgpu_device *adev, - void *inject_if) -{ - struct ras_inject_if *info = (struct ras_inject_if *)inject_if; - int ret; - struct ta_ras_trigger_error_input block_info = { 0 }; - - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) - return -EINVAL; - - block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); - block_info.sub_block_index = info->head.sub_block_index; - block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); - block_info.address = info->address; - block_info.value = info->value; - - mutex_lock(&adev->grbm_idx_mutex); - ret = psp_ras_trigger_error(&adev->psp, &block_info); - mutex_unlock(&adev->grbm_idx_mutex); - - return ret; -} - static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 }; @@ -1030,7 +1007,6 @@ static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) const struct amdgpu_ras_block_hw_ops gfx_v9_4_ras_ops = { - .ras_error_inject = &gfx_v9_4_ras_error_inject, .query_ras_error_count = &gfx_v9_4_query_ras_error_count, .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count, .query_ras_error_status = &gfx_v9_4_query_ras_error_status, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 3a797424579c..63f6843a069e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -761,7 +761,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, for (i = first_vmid; i < last_vmid; i++) { data = 0; - soc15_grbm_select(adev, 0, 0, 0, i); + soc15_grbm_select(adev, 0, 0, 0, i, 0); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, @@ -769,15 +769,18 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_PER_VMID_CNTL), data); } - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + + WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_TRAP_DATA0), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, regSPI_GDBG_TRAP_DATA1), 0); } void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev) { u32 tmp; - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); tmp = 0; tmp = REG_SET_FIELD(tmp, GC_THROTTLE_CTRL, PATTERN_MODE, 1); @@ -1699,28 +1702,6 @@ static void gfx_v9_4_2_reset_ras_error_count(struct amdgpu_device *adev) gfx_v9_4_2_query_utc_edc_count(adev, NULL, NULL); } -static int gfx_v9_4_2_ras_error_inject(struct amdgpu_device *adev, void *inject_if) -{ - struct ras_inject_if *info = (struct ras_inject_if *)inject_if; - int ret; - struct ta_ras_trigger_error_input block_info = { 0 }; - - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) - return -EINVAL; - - block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); - block_info.sub_block_index = info->head.sub_block_index; - block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); - block_info.address = info->address; - block_info.value = info->value; - - mutex_lock(&adev->grbm_idx_mutex); - ret = psp_ras_trigger_error(&adev->psp, &block_info); - mutex_unlock(&adev->grbm_idx_mutex); - - return ret; -} - static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev) { uint32_t i, j; @@ -1935,7 +1916,7 @@ static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) u32 status = 0; struct amdgpu_vmhub *hub; - hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; status = RREG32(hub->vm_l2_pro_fault_status); /* reset page fault status */ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); @@ -1944,7 +1925,6 @@ static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) } struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = { - .ras_error_inject = &gfx_v9_4_2_ras_error_inject, .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, .reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count, .query_ras_error_status = &gfx_v9_4_2_query_ras_error_status, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 5f8500577c02..c1ee54d4c3d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -25,35 +25,504 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "vega10_enum.h" +#include "v9_structs.h" + +#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" + #include "gc/gc_9_4_3_offset.h" #include "gc/gc_9_4_3_sh_mask.h" #include "gfx_v9_4_3.h" +#include "amdgpu_xcp.h" + +MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); +#define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define GOLDEN_GB_ADDR_CONFIG 0x2a114042 + +struct amdgpu_gfx_ras gfx_v9_4_3_ras; + +static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); +static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev); +static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev); +static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev); +static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info); + +static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, + PACKET3_SET_RESOURCES_VMID_MASK(0) | + /* vmid_mask:0* queue_type:0 (KIQ) */ + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); + amdgpu_ring_write(kiq_ring, + lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, + upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx_v9_4_3_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | + /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + /* num_queues: must be 1 */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v9_4_3_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v9_4_3_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + +static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v9_4_3_kiq_set_resources, + .kiq_map_queues = gfx_v9_4_3_kiq_map_queues, + .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues, + .kiq_query_status = gfx_v9_4_3_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, + .invalidate_tlbs_size = 2, +}; + +static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) + adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs; +} + +static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) +{ + int i, num_xcc, dev_inst; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + dev_inst = GET_INST(GC, i); + if (dev_inst >= 2) + WREG32_SOC15(GC, dev_inst, regGRBM_MCM_ADDR, 0x4); + + /* Golden settings applied by driver for ASIC with rev_id 0 */ + if (adev->rev_id == 0) { + WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG, + GOLDEN_GB_ADDR_CONFIG); + + WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1, + REDUCE_FIFO_DEPTH_BY_2, 2); + } + } +} + +static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, + bool wc, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | + WRITE_DATA_DST_SEL(0) | + (wc ? WR_CONFIRM : 0)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring) +{ + uint32_t scratch_reg0_offset, xcc_offset; + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + /* Use register offset which is local to XCC in the packet */ + xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); + scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0); + WREG32(scratch_reg0_offset, 0xCAFEDEAD); + + r = amdgpu_ring_alloc(ring, 3); + if (r) + return r; + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(ring, xcc_offset - PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch_reg0_offset); + if (tmp == 0xDEADBEEF) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; +} + +static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + + unsigned index; + uint64_t gpu_addr; + uint32_t tmp; + long r; + + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 16, + AMDGPU_IB_POOL_DIRECT, &ib); + if (r) + goto err1; + + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); + ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ib.ptr[2] = lower_32_bits(gpu_addr); + ib.ptr[3] = upper_32_bits(gpu_addr); + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err2; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + r = -ETIMEDOUT; + goto err2; + } else if (r < 0) { + goto err2; + } + + tmp = adev->wb.wb[index]; + if (tmp == 0xDEADBEEF) + r = 0; + else + r = -EINVAL; + +err2: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err1: + amdgpu_device_wb_free(adev, index); + return r; +} + + +/* This value might differs per partition */ static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32_SOC15(GC, 0, regRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); amdgpu_gfx_off_ctrl(adev, true); return clock; } -static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, - u32 se_num, - u32 sh_num, - u32 instance) +static void gfx_v9_4_3_free_microcode(struct amdgpu_device *adev) +{ + amdgpu_ucode_release(&adev->gfx.pfp_fw); + amdgpu_ucode_release(&adev->gfx.me_fw); + amdgpu_ucode_release(&adev->gfx.ce_fw); + amdgpu_ucode_release(&adev->gfx.rlc_fw); + amdgpu_ucode_release(&adev->gfx.mec_fw); + amdgpu_ucode_release(&adev->gfx.mec2_fw); + + kfree(adev->gfx.rlc.register_list_format); +} + +static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + uint16_t version_major; + uint16_t version_minor; + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); + if (err) + goto out; + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); +out: + if (err) + amdgpu_ucode_release(&adev->gfx.rlc_fw); + + return err; +} + +static bool gfx_v9_4_3_should_disable_gfxoff(struct pci_dev *pdev) +{ + return true; +} + +static void gfx_v9_4_3_check_if_need_gfxoff(struct amdgpu_device *adev) +{ + if (gfx_v9_4_3_should_disable_gfxoff(adev->pdev)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; +} + +static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + if (err) + goto out; + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); + + adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; + adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; + + gfx_v9_4_3_check_if_need_gfxoff(adev); + +out: + if (err) + amdgpu_ucode_release(&adev->gfx.mec_fw); + return err; +} + +static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + int r; + + chip_name = "gc_9_4_3"; + + r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name); + if (r) + return r; + + r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name); + if (r) + return r; + + return r; +} + +static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); +} + +static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) +{ + int r, i, num_xcc; + u32 *hpd; + const __le32 *fw_data; + unsigned fw_size; + u32 *fw; + size_t mec_hpd_size; + + const struct gfx_firmware_header_v1_0 *mec_hdr; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) + bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap, + AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + mec_hpd_size = + adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE; + if (mec_hpd_size) { + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + gfx_v9_4_3_mec_fini(adev); + return r; + } + + if (amdgpu_emu_mode == 1) { + for (i = 0; i < mec_hpd_size / 4; i++) { + memset((void *)(hpd + i), 0, 4); + if (i % 50 == 0) + msleep(1); + } + } else { + memset(hpd, 0, mec_hpd_size); + } + + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + } + + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.mec_fw_obj, + &adev->gfx.mec.mec_fw_gpu_addr, + (void **)&fw); + if (r) { + dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); + gfx_v9_4_3_mec_fini(adev); + return r; + } + + memcpy(fw, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + + return 0; +} + +static void gfx_v9_4_3_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -76,24 +545,24 @@ static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data); + WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data); } -static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t address) { - WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (address << SQ_IND_INDEX__INDEX__SHIFT) | (SQ_IND_INDEX__FORCE_READ_MASK)); - return RREG32_SOC15(GC, 0, regSQ_IND_DATA); + return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); } -static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, +static void wave_read_regs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t regno, uint32_t num, uint32_t *out) { - WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (regno << SQ_IND_INDEX__INDEX__SHIFT) | @@ -101,53 +570,481 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, (SQ_IND_INDEX__FORCE_READ_MASK) | (SQ_IND_INDEX__AUTO_INCR_MASK)); while (num--) - *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); + *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); } static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev, - uint32_t simd, uint32_t wave, + uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 1 wave data */ dst[(*no_fields)++] = 1; - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); -} - -static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_HW_ID); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_TRAPSTS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_DBG0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_MODE); +} + +static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { - wave_read_regs(adev, simd, wave, 0, + wave_read_regs(adev, xcc_id, simd, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, dst); } -static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) { - wave_read_regs(adev, simd, wave, thread, + wave_read_regs(adev, xcc_id, simd, wave, thread, start + SQIND_WAVE_VGPRS_OFFSET, size, dst); } static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) +{ + soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id)); +} + + +static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, + int num_xccs_per_xcp) +{ + int ret; + + ret = psp_spatial_partition(&adev->psp, NUM_XCC(adev->gfx.xcc_mask) / + num_xccs_per_xcp); + if (ret) + return ret; + + adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp; + + return ret; +} + +static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) +{ + int xcc; + + xcc = hweight8(adev->gfx.xcc_mask & GENMASK(ih_node / 2, 0)); + if (!xcc) { + dev_err(adev->dev, "Couldn't find xcc mapping from IH node"); + return -EINVAL; + } + + return xcc - 1; +} + +static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, + .select_se_sh = &gfx_v9_4_3_xcc_select_se_sh, + .read_wave_data = &gfx_v9_4_3_read_wave_data, + .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, + .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, + .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, + .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, + .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, +}; + +static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) +{ + u32 gb_addr_config; + + adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs; + adev->gfx.ras = &gfx_v9_4_3_ras; + + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 3): + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG); + break; + default: + BUG(); + break; + } + + adev->gfx.config.gb_addr_config = gb_addr_config; + + adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_PIPES); + + adev->gfx.config.max_tile_pipes = + adev->gfx.config.gb_addr_config_fields.num_pipes; + + adev->gfx.config.gb_addr_config_fields.num_banks = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_BANKS); + adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + MAX_COMPRESSED_FRAGS); + adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_RB_PER_SE); + adev->gfx.config.gb_addr_config_fields.num_se = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_SHADER_ENGINES); + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + PIPE_INTERLEAVE_SIZE)); + + return 0; +} + +static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int xcc_id, int mec, int pipe, int queue) +{ + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + unsigned int hw_prio; + uint32_t xcc_doorbell_start; + + ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings + + ring_id]; + + /* mec0 is me1 */ + ring->xcc_id = xcc_id; + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + xcc_doorbell_start = adev->doorbell_index.mec_ring0 + + xcc_id * adev->doorbell_index.xcc_doorbell_range; + ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id + xcc_id * adev->gfx.num_compute_rings) * + GFX9_MEC_HPD_SIZE; + ring->vm_hub = AMDGPU_GFXHUB(xcc_id); + sprintf(ring->name, "comp_%d.%d.%d.%d", + ring->xcc_id, ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + /* type-2 packets are deprecated on MEC, use type-3 instead */ + return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); +} + +static int gfx_v9_4_3_sw_init(void *handle) +{ + int i, j, k, r, ring_id, xcc_id, num_xcc; + struct amdgpu_kiq *kiq; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + /* EOP Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); + if (r) + return r; + + /* Privileged reg */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, + &adev->gfx.priv_reg_irq); + if (r) + return r; + + /* Privileged inst */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, + &adev->gfx.priv_inst_irq); + if (r) + return r; + + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; + + r = adev->gfx.rlc.funcs->init(adev); + if (r) { + DRM_ERROR("Failed to init rlc BOs!\n"); + return r; + } + + r = gfx_v9_4_3_mec_init(adev); + if (r) { + DRM_ERROR("Failed to init MEC BOs!\n"); + return r; + } + + /* set up the compute queues - allocate horizontally across pipes */ + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; + k++) { + if (!amdgpu_gfx_is_mec_queue_enabled( + adev, xcc_id, i, k, j)) + continue; + + r = gfx_v9_4_3_compute_ring_init(adev, + ring_id, + xcc_id, + i, k, j); + if (r) + return r; + + ring_id++; + } + } + } + + r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, xcc_id); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } + + kiq = &adev->gfx.kiq[xcc_id]; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, xcc_id); + if (r) + return r; + + /* create MQD for all compute queues as wel as KIQ for SRIOV case */ + r = amdgpu_gfx_mqd_sw_init(adev, + sizeof(struct v9_mqd_allocation), xcc_id); + if (r) + return r; + } + + r = gfx_v9_4_3_gpu_early_init(adev); + if (r) + return r; + + r = amdgpu_gfx_sysfs_init(adev); + if (r) + return r; + + return amdgpu_gfx_ras_sw_init(adev); +} + +static int gfx_v9_4_3_sw_fini(void *handle) { - soc15_grbm_select(adev, me, pipe, q, vm); + int i, num_xcc; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++) + amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + + for (i = 0; i < num_xcc; i++) { + amdgpu_gfx_mqd_sw_fini(adev, i); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring); + amdgpu_gfx_kiq_fini(adev, i); + } + + gfx_v9_4_3_mec_fini(adev); + amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); + gfx_v9_4_3_free_microcode(adev); + amdgpu_gfx_sysfs_fini(adev); + + return 0; +} + +#define DEFAULT_SH_MEM_BASES (0x6000) +static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev, + int xcc_id) +{ + int i; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + + sh_mem_config = SH_MEM_ADDRESS_MODE_64 | + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + + mutex_lock(&adev->srbm_mutex); + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { + soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id)); + /* CP and shaders */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases); + } + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + + /* Initialize all compute VMIDs to have no GDS, GWS, or OA + acccess. These should be enabled by FW for target VMIDs. */ + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, i, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, i, 0); + } +} + +static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, vmid, 0); + } +} + +static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev, + int xcc_id) +{ + u32 tmp; + int i; + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { + soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id)); + /* CP and shaders */ + if (i == 0) { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!adev->gmc.noretry); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_CONFIG, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_BASES, 0); + } else { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!adev->gmc.noretry); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_CONFIG, tmp); + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> + 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> + 48)); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_BASES, tmp); + } + } + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0)); + + mutex_unlock(&adev->srbm_mutex); + + gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id); + gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id); +} + +static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info); + adev->gfx.config.db_debug2 = + RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2); + + for (i = 0; i < num_xcc; i++) + gfx_v9_4_3_xcc_constants_init(adev, i); +} + +static void +gfx_v9_4_3_xcc_enable_save_restore_machine(struct amdgpu_device *adev, + int xcc_id) +{ + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_SRM_CNTL, SRM_ENABLE, 1); +} + +static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device *adev, int xcc_id) +{ + /* + * Rlc save restore list is workable since v2_1. + * And it's needed by gfxoff feature. + */ + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id); +} + +static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) +{ + uint32_t data; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG); + data |= CPC_PSP_DEBUG__UTCL2IUGPAOVERRIDE_MASK; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data); +} + +static void gfx_v9_4_3_xcc_program_xcc_id(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t tmp = 0; + int num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (num_xcc) { + /* directly config VIRTUAL_XCC_ID to 0 for 1-XCC */ + case 1: + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, 0x8); + break; + case 2: + case 4: + case 6: + case 8: + tmp = (xcc_id % adev->gfx.num_xcc_per_xcp) << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, VIRTUAL_XCC_ID); + tmp = tmp | (adev->gfx.num_xcc_per_xcp << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, tmp); + + break; + default: + break; + } } static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) @@ -155,36 +1052,37 @@ static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) uint32_t rlc_setting; /* if RLC is not enabled, do nothing */ - rlc_setting = RREG32_SOC15(GC, 0, regRLC_CNTL); + rlc_setting = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CNTL); if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) return false; return true; } -static void gfx_v9_4_3_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_4_3_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; data = RLC_SAFE_MODE__CMD_MASK; data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); - WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data); /* wait for RLC_SAFE_MODE */ for (i = 0; i < adev->usec_timeout; i++) { - if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) break; udelay(1); } } -static void gfx_v9_4_3_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, + int xcc_id) { uint32_t data; data = RLC_SAFE_MODE__CMD_MASK; - WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data); } static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) @@ -196,7 +1094,8 @@ static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev) +static void gfx_v9_4_3_xcc_wait_for_rlc_serdes(struct amdgpu_device *adev, + int xcc_id) { u32 i, j, k; u32 mask; @@ -204,15 +1103,17 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff); + gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, + xcc_id); for (k = 0; k < adev->usec_timeout; k++) { - if (RREG32_SOC15(GC, 0, regRLC_SERDES_CU_MASTER_BUSY) == 0) + if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_CU_MASTER_BUSY) == 0) break; udelay(1); } if (k == adev->usec_timeout) { - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, - 0xffffffff, 0xffffffff); + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, + 0xffffffff, + 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", i, j); @@ -220,7 +1121,8 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -228,79 +1130,108 @@ static void gfx_v9_4_3_wait_for_rlc_serdes(struct amdgpu_device *adev) RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; for (k = 0; k < adev->usec_timeout; k++) { - if ((RREG32_SOC15(GC, 0, regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) + if ((RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) break; udelay(1); } } -static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, - bool enable) +static void gfx_v9_4_3_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable, int xcc_id) { u32 tmp; /* These interrupts should be enabled to drive DS clock */ - tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0); + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); - if (adev->gfx.num_gfx_rings) - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); - WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp); +} + +static void gfx_v9_4_3_xcc_rlc_stop(struct amdgpu_device *adev, int xcc_id) +{ + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL, + RLC_ENABLE_F32, 0); + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id); + gfx_v9_4_3_xcc_wait_for_rlc_serdes(adev, xcc_id); } static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) { - WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); - gfx_v9_4_3_enable_gui_idle_interrupt(adev, false); - gfx_v9_4_3_wait_for_rlc_serdes(adev); + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) + gfx_v9_4_3_xcc_rlc_stop(adev, i); } -static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) +static void gfx_v9_4_3_xcc_rlc_reset(struct amdgpu_device *adev, int xcc_id) { - WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET, + SOFT_RESET_RLC, 1); udelay(50); - WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), GRBM_SOFT_RESET, + SOFT_RESET_RLC, 0); udelay(50); } -static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) +static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) { -#ifdef AMDGPU_RLC_DEBUG_RETRY - u32 rlc_ucode_ver; -#endif + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) + gfx_v9_4_3_xcc_rlc_reset(adev, i); +} - WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); +static void gfx_v9_4_3_xcc_rlc_start(struct amdgpu_device *adev, int xcc_id) +{ + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL, + RLC_ENABLE_F32, 1); udelay(50); /* carrizo do enable cp interrupt after cp inited */ if (!(adev->flags & AMD_IS_APU)) { - gfx_v9_4_3_enable_gui_idle_interrupt(adev, true); + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id); udelay(50); } +} +static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) +{ #ifdef AMDGPU_RLC_DEBUG_RETRY - /* RLC_GPM_GENERAL_6 : RLC Ucode version */ - rlc_ucode_ver = RREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_6); - if (rlc_ucode_ver == 0x108) { - dev_info(adev->dev, - "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", - rlc_ucode_ver, adev->gfx.rlc_fw_version); - /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, - * default is 0x9C4 to create a 100us interval */ - WREG32_SOC15(GC, 0, regRLC_GPM_TIMER_INT_3, 0x9C4); - /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr - * to disable the page fault retry interrupts, default is - * 0x100 (256) */ - WREG32_SOC15(GC, 0, regRLC_GPM_GENERAL_12, 0x100); - } + u32 rlc_ucode_ver; +#endif + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + gfx_v9_4_3_xcc_rlc_start(adev, i); +#ifdef AMDGPU_RLC_DEBUG_RETRY + /* RLC_GPM_GENERAL_6 : RLC Ucode version */ + rlc_ucode_ver = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_6); + if (rlc_ucode_ver == 0x108) { + dev_info(adev->dev, + "Using rlc debug ucode. regRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", + rlc_ucode_ver, adev->gfx.rlc_fw_version); + /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, + * default is 0x9C4 to create a 100us interval */ + WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_TIMER_INT_3, 0x9C4); + /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr + * to disable the page fault retry interrupts, default is + * 0x100 (256) */ + WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_GENERAL_12, 0x100); + } #endif + } } -static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev) +static int gfx_v9_4_3_xcc_rlc_load_microcode(struct amdgpu_device *adev, + int xcc_id) { const struct rlc_firmware_header_v2_0 *hdr; const __le32 *fw_data; @@ -316,49 +1247,65 @@ static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev) le32_to_cpu(hdr->header.ucode_array_offset_bytes)); fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR, RLCG_UCODE_LOADING_START_ADDRESS); for (i = 0; i < fw_size; i++) { if (amdgpu_emu_mode == 1 && i % 100 == 0) { dev_info(adev->dev, "Write RLC ucode data %u DWs\n", i); msleep(1); } - WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); } - WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); return 0; } -static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) +static int gfx_v9_4_3_xcc_rlc_resume(struct amdgpu_device *adev, int xcc_id) { int r; - adev->gfx.rlc.funcs->stop(adev); + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + gfx_v9_4_3_xcc_rlc_stop(adev, xcc_id); + /* legacy rlc firmware loading */ + r = gfx_v9_4_3_xcc_rlc_load_microcode(adev, xcc_id); + if (r) + return r; + gfx_v9_4_3_xcc_rlc_start(adev, xcc_id); + } + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); /* disable CG */ - WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0); + gfx_v9_4_3_xcc_init_pg(adev, xcc_id); + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); - /* TODO: revisit pg function */ - /* gfx_v9_4_3_init_pg(adev);*/ + return 0; +} - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - /* legacy rlc firmware loading */ - r = gfx_v9_4_3_rlc_load_microcode(adev); +static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) +{ + int r, i, num_xcc; + + if (amdgpu_sriov_vf(adev)) + return 0; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + r = gfx_v9_4_3_xcc_rlc_resume(adev, i); if (r) return r; } - adev->gfx.rlc.funcs->start(adev); - return 0; } -static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, + unsigned vmid) { u32 reg, data; - reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); + reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL); if (amdgpu_sriov_is_pp_one_vf(adev)) data = RREG32_NO_KIQ(reg); else @@ -368,9 +1315,9 @@ static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; if (amdgpu_sriov_is_pp_one_vf(adev)) - WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); + WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data); else - WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); + WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data); } static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = { @@ -382,7 +1329,7 @@ static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev, uint32_t offset, struct soc15_reg_rlcg *entries, int arr_size) { - int i; + int i, inst; uint32_t reg; if (!entries) @@ -392,7 +1339,12 @@ static bool gfx_v9_4_3_check_rlcg_range(struct amdgpu_device *adev, const struct soc15_reg_rlcg *entry; entry = &entries[i]; - reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; + inst = adev->ip_map.logical_to_dev_inst ? + adev->ip_map.logical_to_dev_inst( + adev, entry->hwip, entry->instance) : + entry->instance; + reg = adev->reg_offset[entry->hwip][inst][entry->segment] + + entry->reg; if (offset == reg) return true; } @@ -407,19 +1359,1025 @@ static bool gfx_v9_4_3_is_rlcg_access_range(struct amdgpu_device *adev, u32 offs ARRAY_SIZE(rlcg_access_gc_9_4_3)); } -const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { - .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, - .select_se_sh = &gfx_v9_4_3_select_se_sh, - .read_wave_data = &gfx_v9_4_3_read_wave_data, - .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, - .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, - .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, -}; +static void gfx_v9_4_3_xcc_cp_compute_enable(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + if (enable) { + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, 0); + } else { + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MEC_CNTL, + (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + adev->gfx.kiq[xcc_id].ring.sched.ready = false; + } + udelay(50); +} + +static int gfx_v9_4_3_xcc_cp_compute_load_microcode(struct amdgpu_device *adev, + int xcc_id) +{ + const struct gfx_firmware_header_v1_0 *mec_hdr; + const __le32 *fw_data; + unsigned i; + u32 tmp; + u32 mec_ucode_addr_offset; + u32 mec_ucode_data_offset; + + if (!adev->gfx.mec_fw) + return -EINVAL; + + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id); + + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); + + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + tmp = 0; + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp); + + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO, + adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI, + upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); + + mec_ucode_addr_offset = + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_ADDR); + mec_ucode_data_offset = + SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_MEC_ME1_UCODE_DATA); + + /* MEC1 */ + WREG32(mec_ucode_addr_offset, mec_hdr->jt_offset); + for (i = 0; i < mec_hdr->jt_size; i++) + WREG32(mec_ucode_data_offset, + le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); + + WREG32(mec_ucode_addr_offset, adev->gfx.mec_fw_version); + /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ + + return 0; +} + +/* KIQ functions */ +static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id) +{ + uint32_t tmp; + struct amdgpu_device *adev = ring->adev; + + /* tell RLC which is KIQ queue */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); + tmp |= 0x80; + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); +} + +static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { + mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; + mqd->cp_hqd_queue_priority = + AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; + } + } +} + +static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; + uint32_t tmp; + + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000003; + + mqd->dynamic_cu_mask_addr_lo = + lower_32_bits(ring->mqd_gpu_addr + + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); + mqd->dynamic_cu_mask_addr_hi = + upper_32_bits(ring->mqd_gpu_addr + + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); + + eop_base_addr = ring->eop_gpu_addr >> 8; + mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); + + mqd->cp_hqd_eop_control = tmp; + + /* enable doorbell? */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL); + + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } else { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* disable the queue if it's active */ + ring->wptr = 0; + mqd->cp_hqd_dequeue_request = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); + + /* set MQD vmid to 0 */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(ring->ring_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); +#endif + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + mqd->cp_hqd_pq_control = tmp; + + /* set the wb address whether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE); + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); + mqd->cp_hqd_persistent_state = tmp; + + /* set MIN_IB_AVAIL_SIZE */ + tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); + mqd->cp_hqd_ib_control = tmp; + + /* set static priority for a queue/ring */ + gfx_v9_4_3_mqd_set_priority(ring, mqd); + mqd->cp_hqd_quantum = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_QUANTUM); + + /* map_queues packet doesn't need activate the queue, + * so only kiq need set this field. + */ + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + mqd->cp_hqd_active = 1; + + return 0; +} + +static int gfx_v9_4_3_xcc_kiq_init_register(struct amdgpu_ring *ring, + int xcc_id) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + int j; + + /* disable wptr polling */ + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0); + + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR, + mqd->cp_hqd_eop_base_addr_lo); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR_HI, + mqd->cp_hqd_eop_base_addr_hi); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL, + mqd->cp_hqd_eop_control); + + /* enable doorbell? */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, + mqd->cp_hqd_dequeue_request); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, + mqd->cp_hqd_pq_rptr); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + } + + /* set the pointer to the MQD */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, + mqd->cp_mqd_base_addr_lo); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, + mqd->cp_mqd_base_addr_hi); + + /* set MQD vmid to 0 */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, + mqd->cp_mqd_control); + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, + mqd->cp_hqd_pq_base_lo); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, + mqd->cp_hqd_pq_base_hi); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, + mqd->cp_hqd_pq_control); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + WREG32_SOC15( + GC, GET_INST(GC, xcc_id), + regCP_MEC_DOORBELL_RANGE_LOWER, + ((adev->doorbell_index.kiq + + xcc_id * adev->doorbell_index.xcc_doorbell_range) * + 2) << 2); + WREG32_SOC15( + GC, GET_INST(GC, xcc_id), + regCP_MEC_DOORBELL_RANGE_UPPER, + ((adev->doorbell_index.userqueue_end + + xcc_id * adev->doorbell_index.xcc_doorbell_range) * + 2) << 2); + } + + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + + /* set the vmid for the queue */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, mqd->cp_hqd_vmid); + + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, + mqd->cp_hqd_persistent_state); + + /* activate the queue */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, + mqd->cp_hqd_active); + + if (ring->use_doorbell) + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_STATUS, DOORBELL_ENABLE, 1); + + return 0; +} + +static int gfx_v9_4_3_xcc_q_fini_register(struct amdgpu_ring *ring, + int xcc_id) +{ + struct amdgpu_device *adev = ring->adev; + int j; + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { + + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); + + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + + if (j == AMDGPU_MAX_USEC_TIMEOUT) { + DRM_DEBUG("%s dequeue request failed.\n", ring->name); + + /* Manual disable if dequeue request times out */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0); + } + + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, + 0); + } + + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IQ_TIMER, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 0); + + return 0; +} + +static int gfx_v9_4_3_xcc_kiq_init_queue(struct amdgpu_ring *ring, int xcc_id) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + struct v9_mqd *tmp_mqd; + + gfx_v9_4_3_xcc_kiq_setting(ring, xcc_id); + + /* GPU could be in bad state during probe, driver trigger the reset + * after load the SMU, in this case , the mqd is not be initialized. + * driver need to re-init the mqd. + * check mqd->cp_hqd_pq_control since this value should not be 0 + */ + tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[xcc_id].mqd_backup; + if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control) { + /* for GPU_RESET case , reset MQD to a clean status */ + if (adev->gfx.kiq[xcc_id].mqd_backup) + memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(struct v9_mqd_allocation)); + + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); + gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + } else { + memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); + ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; + ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); + gfx_v9_4_3_xcc_mqd_init(ring, xcc_id); + gfx_v9_4_3_xcc_kiq_init_register(ring, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.kiq[xcc_id].mqd_backup) + memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); + } + + return 0; +} + +static int gfx_v9_4_3_xcc_kcq_init_queue(struct amdgpu_ring *ring, int xcc_id) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.compute_ring[0]; + struct v9_mqd *tmp_mqd; + + /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control + * is not be initialized before + */ + tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; + + if (!tmp_mqd->cp_hqd_pq_control || + (!amdgpu_in_reset(adev) && !adev->in_suspend)) { + memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); + ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; + ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); + gfx_v9_4_3_xcc_mqd_init(ring, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); + } else { + /* restore MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); + /* reset ring buffer */ + ring->wptr = 0; + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); + amdgpu_ring_clear_ring(ring); + } + + return 0; +} + +static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_id) +{ + struct amdgpu_ring *ring; + int j; + + for (j = 0; j < adev->gfx.num_compute_rings; j++) { + ring = &adev->gfx.compute_ring[j + xcc_id * adev->gfx.num_compute_rings]; + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, + ring->pipe, + ring->queue, 0, GET_INST(GC, xcc_id)); + gfx_v9_4_3_xcc_q_fini_register(ring, xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + } + } + + return 0; +} + +static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id) +{ + struct amdgpu_ring *ring; + int r; + + ring = &adev->gfx.kiq[xcc_id].ring; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (unlikely(r != 0)) { + amdgpu_bo_unreserve(ring->mqd_obj); + return r; + } + + gfx_v9_4_3_xcc_kiq_init_queue(ring, xcc_id); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + amdgpu_bo_unreserve(ring->mqd_obj); + return 0; +} + +static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id) +{ + struct amdgpu_ring *ring = NULL; + int r = 0, i; + + gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id); + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + } + + r = amdgpu_gfx_enable_kcq(adev, xcc_id); +done: + return r; +} + +static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) +{ + struct amdgpu_ring *ring; + int r, j; + + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, false, xcc_id); + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + gfx_v9_4_3_xcc_disable_gpa_mode(adev, xcc_id); + + r = gfx_v9_4_3_xcc_cp_compute_load_microcode(adev, xcc_id); + if (r) + return r; + } + + /* set the virtual and physical id based on partition_mode */ + gfx_v9_4_3_xcc_program_xcc_id(adev, xcc_id); + + r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id); + if (r) + return r; + + r = gfx_v9_4_3_xcc_kcq_resume(adev, xcc_id); + if (r) + return r; + + for (j = 0; j < adev->gfx.num_compute_rings; j++) { + ring = &adev->gfx.compute_ring + [j + xcc_id * adev->gfx.num_compute_rings]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + + gfx_v9_4_3_xcc_enable_gui_idle_interrupt(adev, true, xcc_id); + + return 0; +} + +static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) +{ + int r = 0, i, num_xcc; + + if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + AMDGPU_XCP_FL_NONE) == + AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, + amdgpu_user_partt_mode); + + if (r) + return r; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + r = gfx_v9_4_3_xcc_cp_resume(adev, i); + if (r) + return r; + } + + return 0; +} + +static void gfx_v9_4_3_xcc_cp_enable(struct amdgpu_device *adev, bool enable, + int xcc_id) +{ + gfx_v9_4_3_xcc_cp_compute_enable(adev, enable, xcc_id); +} + +static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id) +{ + if (amdgpu_gfx_disable_kcq(adev, xcc_id)) + DRM_ERROR("XCD %d KCQ disable failed\n", xcc_id); + + /* Use deinitialize sequence from CAIL when unbinding device + * from driver, otherwise KIQ is hanging when binding back + */ + if (!amdgpu_in_reset(adev) && !adev->in_suspend) { + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, adev->gfx.kiq[xcc_id].ring.me, + adev->gfx.kiq[xcc_id].ring.pipe, + adev->gfx.kiq[xcc_id].ring.queue, 0, + GET_INST(GC, xcc_id)); + gfx_v9_4_3_xcc_q_fini_register(&adev->gfx.kiq[xcc_id].ring, + xcc_id); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + } + + gfx_v9_4_3_xcc_kcq_fini_register(adev, xcc_id); + gfx_v9_4_3_xcc_cp_enable(adev, false, xcc_id); +} + +static int gfx_v9_4_3_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gfx_v9_4_3_init_golden_registers(adev); + + gfx_v9_4_3_constants_init(adev); + + r = adev->gfx.rlc.funcs->resume(adev); + if (r) + return r; + + r = gfx_v9_4_3_cp_resume(adev); + if (r) + return r; + + return r; +} + +static int gfx_v9_4_3_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, num_xcc; + + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + gfx_v9_4_3_xcc_fini(adev, i); + } + + return 0; +} + +static int gfx_v9_4_3_suspend(void *handle) +{ + return gfx_v9_4_3_hw_fini(handle); +} + +static int gfx_v9_4_3_resume(void *handle) +{ + return gfx_v9_4_3_hw_init(handle); +} + +static bool gfx_v9_4_3_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + if (REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, i), regGRBM_STATUS), + GRBM_STATUS, GUI_ACTIVE)) + return false; + } + return true; +} + +static int gfx_v9_4_3_wait_for_idle(void *handle) +{ + unsigned i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + if (gfx_v9_4_3_is_idle(handle)) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static int gfx_v9_4_3_soft_reset(void *handle) +{ + u32 grbm_soft_reset = 0; + u32 tmp; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* GRBM_STATUS */ + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS); + if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | + GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | + GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | + GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | + GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | + GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); + } + + if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, 1); + } + + /* GRBM_STATUS2 */ + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS2); + if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + + + if (grbm_soft_reset) { + /* stop the rlc */ + adev->gfx.rlc.funcs->stop(adev); + + /* Disable MEC parsing/prefetching */ + gfx_v9_4_3_xcc_cp_compute_enable(adev, false, 0); + + if (grbm_soft_reset) { + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_SOFT_RESET); + } + + /* Wait a little for things to settle down */ + udelay(50); + } + return 0; +} + +static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring, + uint32_t vmid, + uint32_t gds_base, uint32_t gds_size, + uint32_t gws_base, uint32_t gws_size, + uint32_t oa_base, uint32_t oa_size) +{ + struct amdgpu_device *adev = ring->adev; + + /* GDS Base */ + gfx_v9_4_3_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_BASE) + 2 * vmid, + gds_base); + + /* GDS Size */ + gfx_v9_4_3_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_VMID0_SIZE) + 2 * vmid, + gds_size); + + /* GWS */ + gfx_v9_4_3_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_GWS_VMID0) + vmid, + gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); + + /* OA */ + gfx_v9_4_3_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regGDS_OA_VMID0) + vmid, + (1 << (oa_size + oa_base)) - (1 << oa_base)); +} + +static int gfx_v9_4_3_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); + gfx_v9_4_3_set_kiq_pm4_funcs(adev); + gfx_v9_4_3_set_ring_funcs(adev); + gfx_v9_4_3_set_irq_funcs(adev); + gfx_v9_4_3_set_gds_init(adev); + gfx_v9_4_3_set_rlc_funcs(adev); + + return gfx_v9_4_3_init_microcode(adev); +} + +static int gfx_v9_4_3_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); + if (r) + return r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); + if (r) + return r; + + return 0; +} + +static void gfx_v9_4_3_xcc_update_sram_fgcg(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) + return; + + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CGTT_MGCG_OVERRIDE); + + if (enable) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CGTT_MGCG_OVERRIDE, data); + + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL); + + if (enable) + data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; + else + data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL, data); +} + +static void gfx_v9_4_3_xcc_update_repeater_fgcg(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) + return; + + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CGTT_MGCG_OVERRIDE); + + if (enable) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REP_FGCG_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regRLC_CGTT_MGCG_OVERRIDE, data); +} + +static void +gfx_v9_4_3_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t data, def; + + /* It is disabled by HW by default */ + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { + /* 1 - RLC_CGTT_MGCG_OVERRIDE */ + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); + + data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); + + /* MGLS is a global flag to control all MGLS in GFX */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { + /* 2 - RLC memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL); + data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data); + } + /* 3 - CP memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL); + data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data); + } + } + } else { + /* 1 - MGCG_OVERRIDE */ + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); + + data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); + + /* 2 - disable MGLS in RLC */ + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { + data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_MEM_SLP_CNTL, data); + } + + /* 3 - disable MGLS in CP */ + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { + data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEM_SLP_CNTL, data); + } + } + +} + +static void +gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + uint32_t def, data; + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); + + /* enable cgcg FSM(0x0000363F) */ + def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); + + data = (0x36 + << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL); + data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data); + } else { + def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); + /* reset CGCG/CGLS bits */ + data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + /* disable cgcg and cgls in FSM */ + if (def != data) + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data); + } + +} -const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { +static int gfx_v9_4_3_xcc_update_gfx_clock_gating(struct amdgpu_device *adev, + bool enable, int xcc_id) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); + + if (enable) { + /* FGCG */ + gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id); + gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id); + + /* CGCG/CGLS should be enabled after MGCG/MGLS + * === MGCG + MGLS === + */ + gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable, + xcc_id); + /* === CGCG + CGLS === */ + gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable, + xcc_id); + } else { + /* CGCG/CGLS should be disabled before MGCG/MGLS + * === CGCG + CGLS === + */ + gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(adev, enable, + xcc_id); + /* === MGCG + MGLS === */ + gfx_v9_4_3_xcc_update_medium_grain_clock_gating(adev, enable, + xcc_id); + + /* FGCG */ + gfx_v9_4_3_xcc_update_sram_fgcg(adev, enable, xcc_id); + gfx_v9_4_3_xcc_update_repeater_fgcg(adev, enable, xcc_id); + } + + amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); + + return 0; +} + +static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { .is_rlc_enabled = gfx_v9_4_3_is_rlc_enabled, - .set_safe_mode = gfx_v9_4_3_set_safe_mode, - .unset_safe_mode = gfx_v9_4_3_unset_safe_mode, + .set_safe_mode = gfx_v9_4_3_xcc_set_safe_mode, + .unset_safe_mode = gfx_v9_4_3_xcc_unset_safe_mode, .init = gfx_v9_4_3_rlc_init, .resume = gfx_v9_4_3_rlc_resume, .stop = gfx_v9_4_3_rlc_stop, @@ -428,3 +2386,1982 @@ const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { .update_spm_vmid = gfx_v9_4_3_update_spm_vmid, .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range, }; + +static int gfx_v9_4_3_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static int gfx_v9_4_3_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, num_xcc; + + if (amdgpu_sriov_vf(adev)) + return 0; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 3): + for (i = 0; i < num_xcc; i++) + gfx_v9_4_3_xcc_update_gfx_clock_gating( + adev, state == AMD_CG_STATE_GATE, i); + break; + default: + break; + } + return 0; +} + +static void gfx_v9_4_3_get_clockgating_state(void *handle, u64 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_GFX_MGCG */ + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGTT_MGCG_OVERRIDE)); + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_MGCG; + + /* AMD_CG_SUPPORT_GFX_CGCG */ + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_CGCG_CGLS_CTRL)); + if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGCG; + + /* AMD_CG_SUPPORT_GFX_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGLS; + + /* AMD_CG_SUPPORT_GFX_RLC_LS */ + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_MEM_SLP_CNTL)); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; + + /* AMD_CG_SUPPORT_GFX_CP_LS */ + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCP_MEM_SLP_CNTL)); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; +} + +static void gfx_v9_4_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 ref_and_mask, reg_mem_engine; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + switch (ring->me) { + case 1: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; + break; + case 2: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; + break; + default: + return; + } + reg_mem_engine = 0; + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + reg_mem_engine = 1; /* pfp */ + } + + gfx_v9_4_3_wait_reg_mem(ring, reg_mem_engine, 0, 1, + adev->nbio.funcs->get_hdp_flush_req_offset(adev), + adev->nbio.funcs->get_hdp_flush_done_offset(adev), + ref_and_mask, ref_and_mask, 0x20); +} + +static void gfx_v9_4_3_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +static void gfx_v9_4_3_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; + + /* RELEASE_MEM - flush caches, send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); + amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | + EOP_TC_NC_ACTION_EN) : + (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EOP_TC_MD_ACTION_EN)) | + EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + EVENT_INDEX(5))); + amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); + + /* + * the address should be Qword aligned if 64bit write, Dword + * aligned if only send 32bit data low (discard data high) + */ + if (write64bit) + BUG_ON(addr & 0x7); + else + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v9_4_3_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + gfx_v9_4_3_wait_reg_mem(ring, usepfp, 1, 0, + lower_32_bits(addr), upper_32_bits(addr), + seq, 0xffffffff, 4); +} + +static void gfx_v9_4_3_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); +} + +static u64 gfx_v9_4_3_ring_get_rptr_compute(struct amdgpu_ring *ring) +{ + return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ +} + +static u64 gfx_v9_4_3_ring_get_wptr_compute(struct amdgpu_ring *ring) +{ + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); + else + BUG(); + return wptr; +} + +static void gfx_v9_4_3_ring_set_wptr_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); /* only DOORBELL method supported on gfx9 now */ + } +} + +static void gfx_v9_4_3_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned int flags) +{ + struct amdgpu_device *adev = ring->adev; + + /* we only allocate 32bit for each seq wb address */ + BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + /* write fence seq to the "addr" */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + if (flags & AMDGPU_FENCE_FLAG_INT) { + /* set register to trigger INT */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); + amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ + } +} + +static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t reg_val_offs) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); + amdgpu_ring_write(ring, 0 | /* src: register*/ + (5 << 8) | /* dst: memory */ + (1 << 20)); /* write confirm */ + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + + reg_val_offs * 4)); + amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + + reg_val_offs * 4)); +} + +static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val) +{ + uint32_t cmd = 0; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; + break; + case AMDGPU_RING_TYPE_KIQ: + cmd = (1 << 16); /* no inc addr */ + break; + default: + cmd = WR_CONFIRM; + break; + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, cmd); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v9_4_3_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v9_4_3_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + +static void gfx_v9_4_3_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + +static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + struct amdgpu_device *adev, int me, int pipe, + enum amdgpu_interrupt_state state, int xcc_id) +{ + u32 mec_int_cntl, mec_int_cntl_reg; + + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + + if (me == 1) { + switch (pipe) { + case 0: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE0_INT_CNTL); + break; + case 1: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE1_INT_CNTL); + break; + case 2: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE2_INT_CNTL); + break; + case 3: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regCP_ME1_PIPE3_INT_CNTL); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); + return; + } + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 0); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 1); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + default: + break; + } +} + +static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < num_xcc; i++) + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } + + return 0; +} + +static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + for (i = 0; i < num_xcc; i++) + WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } + + return 0; +} + +static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + switch (type) { + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 1, 0, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 1, 1, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 1, 2, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 1, 3, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 2, 0, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 2, 1, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 2, 2, state, i); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: + gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( + adev, 2, 3, state, i); + break; + default: + break; + } + } + + return 0; +} + +static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + int i, xcc_id; + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + + DRM_DEBUG("IH: CP EOP\n"); + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id); + + if (xcc_id == -EINVAL) + return -EINVAL; + + switch (me_id) { + case 0: + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring + [i + + xcc_id * adev->gfx.num_compute_rings]; + /* Per-queue interrupt is supported for MEC starting from VI. + * The interrupt can only be enabled/disabled per pipe instead of per queue. + */ + + if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) + amdgpu_fence_process(ring); + } + break; + } + return 0; +} + +static void gfx_v9_4_3_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + int i, xcc_id; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + xcc_id = gfx_v9_4_3_ih_to_xcc_inst(adev, entry->node_id); + + if (xcc_id == -EINVAL) + return; + + switch (me_id) { + case 0: + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring + [i + + xcc_id * adev->gfx.num_compute_rings]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + } +} + +static int gfx_v9_4_3_priv_reg_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal register access in command stream\n"); + gfx_v9_4_3_fault(adev, entry); + return 0; +} + +static int gfx_v9_4_3_priv_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal instruction in command stream\n"); + gfx_v9_4_3_fault(adev, entry); + return 0; +} + +static void gfx_v9_4_3_emit_mem_sync(struct amdgpu_ring *ring) +{ + const unsigned int cp_coher_cntl = + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | + PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); + + /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); + amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ +} + +static void gfx_v9_4_3_emit_wave_limit_cs(struct amdgpu_ring *ring, + uint32_t pipe, bool enable) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t val; + uint32_t wcl_cs_reg; + + /* regSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ + val = enable ? 0x1 : 0x7f; + + switch (pipe) { + case 0: + wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS0); + break; + case 1: + wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS1); + break; + case 2: + wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS2); + break; + case 3: + wcl_cs_reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_CS3); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + + amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); + +} +static void gfx_v9_4_3_emit_wave_limit(struct amdgpu_ring *ring, bool enable) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t val; + int i; + + /* regSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit + * number of gfx waves. Setting 5 bit will make sure gfx only gets + * around 25% of gpu resources. + */ + val = enable ? 0x1f : 0x07ffffff; + amdgpu_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regSPI_WCL_PIPE_PERCENT_GFX), + val); + + /* Restrict waves for normal/low priority compute queues as well + * to get best QoS for high priority compute jobs. + * + * amdgpu controls only 1st ME(0-3 CS pipes). + */ + for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { + if (i != ring->pipe) + gfx_v9_4_3_emit_wave_limit_cs(ring, i, enable); + + } +} + +enum amdgpu_gfx_cp_ras_mem_id { + AMDGPU_GFX_CP_MEM1 = 1, + AMDGPU_GFX_CP_MEM2, + AMDGPU_GFX_CP_MEM3, + AMDGPU_GFX_CP_MEM4, + AMDGPU_GFX_CP_MEM5, +}; + +enum amdgpu_gfx_gcea_ras_mem_id { + AMDGPU_GFX_GCEA_IOWR_CMDMEM = 4, + AMDGPU_GFX_GCEA_IORD_CMDMEM, + AMDGPU_GFX_GCEA_GMIWR_CMDMEM, + AMDGPU_GFX_GCEA_GMIRD_CMDMEM, + AMDGPU_GFX_GCEA_DRAMWR_CMDMEM, + AMDGPU_GFX_GCEA_DRAMRD_CMDMEM, + AMDGPU_GFX_GCEA_MAM_DMEM0, + AMDGPU_GFX_GCEA_MAM_DMEM1, + AMDGPU_GFX_GCEA_MAM_DMEM2, + AMDGPU_GFX_GCEA_MAM_DMEM3, + AMDGPU_GFX_GCEA_MAM_AMEM0, + AMDGPU_GFX_GCEA_MAM_AMEM1, + AMDGPU_GFX_GCEA_MAM_AMEM2, + AMDGPU_GFX_GCEA_MAM_AMEM3, + AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER, + AMDGPU_GFX_GCEA_WRET_TAGMEM, + AMDGPU_GFX_GCEA_RRET_TAGMEM, + AMDGPU_GFX_GCEA_IOWR_DATAMEM, + AMDGPU_GFX_GCEA_GMIWR_DATAMEM, + AMDGPU_GFX_GCEA_DRAM_DATAMEM, +}; + +enum amdgpu_gfx_gc_cane_ras_mem_id { + AMDGPU_GFX_GC_CANE_MEM0 = 0, +}; + +enum amdgpu_gfx_gcutcl2_ras_mem_id { + AMDGPU_GFX_GCUTCL2_MEM2P512X95 = 160, +}; + +enum amdgpu_gfx_gds_ras_mem_id { + AMDGPU_GFX_GDS_MEM0 = 0, +}; + +enum amdgpu_gfx_lds_ras_mem_id { + AMDGPU_GFX_LDS_BANK0 = 0, + AMDGPU_GFX_LDS_BANK1, + AMDGPU_GFX_LDS_BANK2, + AMDGPU_GFX_LDS_BANK3, + AMDGPU_GFX_LDS_BANK4, + AMDGPU_GFX_LDS_BANK5, + AMDGPU_GFX_LDS_BANK6, + AMDGPU_GFX_LDS_BANK7, + AMDGPU_GFX_LDS_BANK8, + AMDGPU_GFX_LDS_BANK9, + AMDGPU_GFX_LDS_BANK10, + AMDGPU_GFX_LDS_BANK11, + AMDGPU_GFX_LDS_BANK12, + AMDGPU_GFX_LDS_BANK13, + AMDGPU_GFX_LDS_BANK14, + AMDGPU_GFX_LDS_BANK15, + AMDGPU_GFX_LDS_BANK16, + AMDGPU_GFX_LDS_BANK17, + AMDGPU_GFX_LDS_BANK18, + AMDGPU_GFX_LDS_BANK19, + AMDGPU_GFX_LDS_BANK20, + AMDGPU_GFX_LDS_BANK21, + AMDGPU_GFX_LDS_BANK22, + AMDGPU_GFX_LDS_BANK23, + AMDGPU_GFX_LDS_BANK24, + AMDGPU_GFX_LDS_BANK25, + AMDGPU_GFX_LDS_BANK26, + AMDGPU_GFX_LDS_BANK27, + AMDGPU_GFX_LDS_BANK28, + AMDGPU_GFX_LDS_BANK29, + AMDGPU_GFX_LDS_BANK30, + AMDGPU_GFX_LDS_BANK31, + AMDGPU_GFX_LDS_SP_BUFFER_A, + AMDGPU_GFX_LDS_SP_BUFFER_B, +}; + +enum amdgpu_gfx_rlc_ras_mem_id { + AMDGPU_GFX_RLC_GPMF32 = 1, + AMDGPU_GFX_RLC_RLCVF32, + AMDGPU_GFX_RLC_SCRATCH, + AMDGPU_GFX_RLC_SRM_ARAM, + AMDGPU_GFX_RLC_SRM_DRAM, + AMDGPU_GFX_RLC_TCTAG, + AMDGPU_GFX_RLC_SPM_SE, + AMDGPU_GFX_RLC_SPM_GRBMT, +}; + +enum amdgpu_gfx_sp_ras_mem_id { + AMDGPU_GFX_SP_SIMDID0 = 0, +}; + +enum amdgpu_gfx_spi_ras_mem_id { + AMDGPU_GFX_SPI_MEM0 = 0, + AMDGPU_GFX_SPI_MEM1, + AMDGPU_GFX_SPI_MEM2, + AMDGPU_GFX_SPI_MEM3, +}; + +enum amdgpu_gfx_sqc_ras_mem_id { + AMDGPU_GFX_SQC_INST_CACHE_A = 100, + AMDGPU_GFX_SQC_INST_CACHE_B = 101, + AMDGPU_GFX_SQC_INST_CACHE_TAG_A = 102, + AMDGPU_GFX_SQC_INST_CACHE_TAG_B = 103, + AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A = 104, + AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B = 105, + AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A = 106, + AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B = 107, + AMDGPU_GFX_SQC_DATA_CACHE_A = 200, + AMDGPU_GFX_SQC_DATA_CACHE_B = 201, + AMDGPU_GFX_SQC_DATA_CACHE_TAG_A = 202, + AMDGPU_GFX_SQC_DATA_CACHE_TAG_B = 203, + AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A = 204, + AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B = 205, + AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A = 206, + AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B = 207, + AMDGPU_GFX_SQC_DIRTY_BIT_A = 208, + AMDGPU_GFX_SQC_DIRTY_BIT_B = 209, + AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0 = 210, + AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1 = 211, + AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A = 212, + AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B = 213, + AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE = 108, +}; + +enum amdgpu_gfx_sq_ras_mem_id { + AMDGPU_GFX_SQ_SGPR_MEM0 = 0, + AMDGPU_GFX_SQ_SGPR_MEM1, + AMDGPU_GFX_SQ_SGPR_MEM2, + AMDGPU_GFX_SQ_SGPR_MEM3, +}; + +enum amdgpu_gfx_ta_ras_mem_id { + AMDGPU_GFX_TA_FS_AFIFO_RAM_LO = 1, + AMDGPU_GFX_TA_FS_AFIFO_RAM_HI, + AMDGPU_GFX_TA_FS_CFIFO_RAM, + AMDGPU_GFX_TA_FSX_LFIFO, + AMDGPU_GFX_TA_FS_DFIFO_RAM, +}; + +enum amdgpu_gfx_tcc_ras_mem_id { + AMDGPU_GFX_TCC_MEM1 = 1, +}; + +enum amdgpu_gfx_tca_ras_mem_id { + AMDGPU_GFX_TCA_MEM1 = 1, +}; + +enum amdgpu_gfx_tci_ras_mem_id { + AMDGPU_GFX_TCIW_MEM = 1, +}; + +enum amdgpu_gfx_tcp_ras_mem_id { + AMDGPU_GFX_TCP_LFIFO0 = 1, + AMDGPU_GFX_TCP_SET0BANK0_RAM, + AMDGPU_GFX_TCP_SET0BANK1_RAM, + AMDGPU_GFX_TCP_SET0BANK2_RAM, + AMDGPU_GFX_TCP_SET0BANK3_RAM, + AMDGPU_GFX_TCP_SET1BANK0_RAM, + AMDGPU_GFX_TCP_SET1BANK1_RAM, + AMDGPU_GFX_TCP_SET1BANK2_RAM, + AMDGPU_GFX_TCP_SET1BANK3_RAM, + AMDGPU_GFX_TCP_SET2BANK0_RAM, + AMDGPU_GFX_TCP_SET2BANK1_RAM, + AMDGPU_GFX_TCP_SET2BANK2_RAM, + AMDGPU_GFX_TCP_SET2BANK3_RAM, + AMDGPU_GFX_TCP_SET3BANK0_RAM, + AMDGPU_GFX_TCP_SET3BANK1_RAM, + AMDGPU_GFX_TCP_SET3BANK2_RAM, + AMDGPU_GFX_TCP_SET3BANK3_RAM, + AMDGPU_GFX_TCP_VM_FIFO, + AMDGPU_GFX_TCP_DB_TAGRAM0, + AMDGPU_GFX_TCP_DB_TAGRAM1, + AMDGPU_GFX_TCP_DB_TAGRAM2, + AMDGPU_GFX_TCP_DB_TAGRAM3, + AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0, + AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1, + AMDGPU_GFX_TCP_CMD_FIFO, +}; + +enum amdgpu_gfx_td_ras_mem_id { + AMDGPU_GFX_TD_UTD_CS_FIFO_MEM = 1, + AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM, + AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM, +}; + +enum amdgpu_gfx_tcx_ras_mem_id { + AMDGPU_GFX_TCX_FIFOD0 = 0, + AMDGPU_GFX_TCX_FIFOD1, + AMDGPU_GFX_TCX_FIFOD2, + AMDGPU_GFX_TCX_FIFOD3, + AMDGPU_GFX_TCX_FIFOD4, + AMDGPU_GFX_TCX_FIFOD5, + AMDGPU_GFX_TCX_FIFOD6, + AMDGPU_GFX_TCX_FIFOD7, + AMDGPU_GFX_TCX_FIFOB0, + AMDGPU_GFX_TCX_FIFOB1, + AMDGPU_GFX_TCX_FIFOB2, + AMDGPU_GFX_TCX_FIFOB3, + AMDGPU_GFX_TCX_FIFOB4, + AMDGPU_GFX_TCX_FIFOB5, + AMDGPU_GFX_TCX_FIFOB6, + AMDGPU_GFX_TCX_FIFOB7, + AMDGPU_GFX_TCX_FIFOA0, + AMDGPU_GFX_TCX_FIFOA1, + AMDGPU_GFX_TCX_FIFOA2, + AMDGPU_GFX_TCX_FIFOA3, + AMDGPU_GFX_TCX_FIFOA4, + AMDGPU_GFX_TCX_FIFOA5, + AMDGPU_GFX_TCX_FIFOA6, + AMDGPU_GFX_TCX_FIFOA7, + AMDGPU_GFX_TCX_CFIFO0, + AMDGPU_GFX_TCX_CFIFO1, + AMDGPU_GFX_TCX_CFIFO2, + AMDGPU_GFX_TCX_CFIFO3, + AMDGPU_GFX_TCX_CFIFO4, + AMDGPU_GFX_TCX_CFIFO5, + AMDGPU_GFX_TCX_CFIFO6, + AMDGPU_GFX_TCX_CFIFO7, + AMDGPU_GFX_TCX_FIFO_ACKB0, + AMDGPU_GFX_TCX_FIFO_ACKB1, + AMDGPU_GFX_TCX_FIFO_ACKB2, + AMDGPU_GFX_TCX_FIFO_ACKB3, + AMDGPU_GFX_TCX_FIFO_ACKB4, + AMDGPU_GFX_TCX_FIFO_ACKB5, + AMDGPU_GFX_TCX_FIFO_ACKB6, + AMDGPU_GFX_TCX_FIFO_ACKB7, + AMDGPU_GFX_TCX_FIFO_ACKD0, + AMDGPU_GFX_TCX_FIFO_ACKD1, + AMDGPU_GFX_TCX_FIFO_ACKD2, + AMDGPU_GFX_TCX_FIFO_ACKD3, + AMDGPU_GFX_TCX_FIFO_ACKD4, + AMDGPU_GFX_TCX_FIFO_ACKD5, + AMDGPU_GFX_TCX_FIFO_ACKD6, + AMDGPU_GFX_TCX_FIFO_ACKD7, + AMDGPU_GFX_TCX_DST_FIFOA0, + AMDGPU_GFX_TCX_DST_FIFOA1, + AMDGPU_GFX_TCX_DST_FIFOA2, + AMDGPU_GFX_TCX_DST_FIFOA3, + AMDGPU_GFX_TCX_DST_FIFOA4, + AMDGPU_GFX_TCX_DST_FIFOA5, + AMDGPU_GFX_TCX_DST_FIFOA6, + AMDGPU_GFX_TCX_DST_FIFOA7, + AMDGPU_GFX_TCX_DST_FIFOB0, + AMDGPU_GFX_TCX_DST_FIFOB1, + AMDGPU_GFX_TCX_DST_FIFOB2, + AMDGPU_GFX_TCX_DST_FIFOB3, + AMDGPU_GFX_TCX_DST_FIFOB4, + AMDGPU_GFX_TCX_DST_FIFOB5, + AMDGPU_GFX_TCX_DST_FIFOB6, + AMDGPU_GFX_TCX_DST_FIFOB7, + AMDGPU_GFX_TCX_DST_FIFOD0, + AMDGPU_GFX_TCX_DST_FIFOD1, + AMDGPU_GFX_TCX_DST_FIFOD2, + AMDGPU_GFX_TCX_DST_FIFOD3, + AMDGPU_GFX_TCX_DST_FIFOD4, + AMDGPU_GFX_TCX_DST_FIFOD5, + AMDGPU_GFX_TCX_DST_FIFOD6, + AMDGPU_GFX_TCX_DST_FIFOD7, + AMDGPU_GFX_TCX_DST_FIFO_ACKB0, + AMDGPU_GFX_TCX_DST_FIFO_ACKB1, + AMDGPU_GFX_TCX_DST_FIFO_ACKB2, + AMDGPU_GFX_TCX_DST_FIFO_ACKB3, + AMDGPU_GFX_TCX_DST_FIFO_ACKB4, + AMDGPU_GFX_TCX_DST_FIFO_ACKB5, + AMDGPU_GFX_TCX_DST_FIFO_ACKB6, + AMDGPU_GFX_TCX_DST_FIFO_ACKB7, + AMDGPU_GFX_TCX_DST_FIFO_ACKD0, + AMDGPU_GFX_TCX_DST_FIFO_ACKD1, + AMDGPU_GFX_TCX_DST_FIFO_ACKD2, + AMDGPU_GFX_TCX_DST_FIFO_ACKD3, + AMDGPU_GFX_TCX_DST_FIFO_ACKD4, + AMDGPU_GFX_TCX_DST_FIFO_ACKD5, + AMDGPU_GFX_TCX_DST_FIFO_ACKD6, + AMDGPU_GFX_TCX_DST_FIFO_ACKD7, +}; + +enum amdgpu_gfx_atc_l2_ras_mem_id { + AMDGPU_GFX_ATC_L2_MEM0 = 0, +}; + +enum amdgpu_gfx_utcl2_ras_mem_id { + AMDGPU_GFX_UTCL2_MEM0 = 0, +}; + +enum amdgpu_gfx_vml2_ras_mem_id { + AMDGPU_GFX_VML2_MEM0 = 0, +}; + +enum amdgpu_gfx_vml2_walker_ras_mem_id { + AMDGPU_GFX_VML2_WALKER_MEM0 = 0, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_cp_mem_list[] = { + {AMDGPU_GFX_CP_MEM1, "CP_MEM1"}, + {AMDGPU_GFX_CP_MEM2, "CP_MEM2"}, + {AMDGPU_GFX_CP_MEM3, "CP_MEM3"}, + {AMDGPU_GFX_CP_MEM4, "CP_MEM4"}, + {AMDGPU_GFX_CP_MEM5, "CP_MEM5"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcea_mem_list[] = { + {AMDGPU_GFX_GCEA_IOWR_CMDMEM, "GCEA_IOWR_CMDMEM"}, + {AMDGPU_GFX_GCEA_IORD_CMDMEM, "GCEA_IORD_CMDMEM"}, + {AMDGPU_GFX_GCEA_GMIWR_CMDMEM, "GCEA_GMIWR_CMDMEM"}, + {AMDGPU_GFX_GCEA_GMIRD_CMDMEM, "GCEA_GMIRD_CMDMEM"}, + {AMDGPU_GFX_GCEA_DRAMWR_CMDMEM, "GCEA_DRAMWR_CMDMEM"}, + {AMDGPU_GFX_GCEA_DRAMRD_CMDMEM, "GCEA_DRAMRD_CMDMEM"}, + {AMDGPU_GFX_GCEA_MAM_DMEM0, "GCEA_MAM_DMEM0"}, + {AMDGPU_GFX_GCEA_MAM_DMEM1, "GCEA_MAM_DMEM1"}, + {AMDGPU_GFX_GCEA_MAM_DMEM2, "GCEA_MAM_DMEM2"}, + {AMDGPU_GFX_GCEA_MAM_DMEM3, "GCEA_MAM_DMEM3"}, + {AMDGPU_GFX_GCEA_MAM_AMEM0, "GCEA_MAM_AMEM0"}, + {AMDGPU_GFX_GCEA_MAM_AMEM1, "GCEA_MAM_AMEM1"}, + {AMDGPU_GFX_GCEA_MAM_AMEM2, "GCEA_MAM_AMEM2"}, + {AMDGPU_GFX_GCEA_MAM_AMEM3, "GCEA_MAM_AMEM3"}, + {AMDGPU_GFX_GCEA_MAM_AFLUSH_BUFFER, "GCEA_MAM_AFLUSH_BUFFER"}, + {AMDGPU_GFX_GCEA_WRET_TAGMEM, "GCEA_WRET_TAGMEM"}, + {AMDGPU_GFX_GCEA_RRET_TAGMEM, "GCEA_RRET_TAGMEM"}, + {AMDGPU_GFX_GCEA_IOWR_DATAMEM, "GCEA_IOWR_DATAMEM"}, + {AMDGPU_GFX_GCEA_GMIWR_DATAMEM, "GCEA_GMIWR_DATAMEM"}, + {AMDGPU_GFX_GCEA_DRAM_DATAMEM, "GCEA_DRAM_DATAMEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gc_cane_mem_list[] = { + {AMDGPU_GFX_GC_CANE_MEM0, "GC_CANE_MEM0"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gcutcl2_mem_list[] = { + {AMDGPU_GFX_GCUTCL2_MEM2P512X95, "GCUTCL2_MEM2P512X95"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_gds_mem_list[] = { + {AMDGPU_GFX_GDS_MEM0, "GDS_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_lds_mem_list[] = { + {AMDGPU_GFX_LDS_BANK0, "LDS_BANK0"}, + {AMDGPU_GFX_LDS_BANK1, "LDS_BANK1"}, + {AMDGPU_GFX_LDS_BANK2, "LDS_BANK2"}, + {AMDGPU_GFX_LDS_BANK3, "LDS_BANK3"}, + {AMDGPU_GFX_LDS_BANK4, "LDS_BANK4"}, + {AMDGPU_GFX_LDS_BANK5, "LDS_BANK5"}, + {AMDGPU_GFX_LDS_BANK6, "LDS_BANK6"}, + {AMDGPU_GFX_LDS_BANK7, "LDS_BANK7"}, + {AMDGPU_GFX_LDS_BANK8, "LDS_BANK8"}, + {AMDGPU_GFX_LDS_BANK9, "LDS_BANK9"}, + {AMDGPU_GFX_LDS_BANK10, "LDS_BANK10"}, + {AMDGPU_GFX_LDS_BANK11, "LDS_BANK11"}, + {AMDGPU_GFX_LDS_BANK12, "LDS_BANK12"}, + {AMDGPU_GFX_LDS_BANK13, "LDS_BANK13"}, + {AMDGPU_GFX_LDS_BANK14, "LDS_BANK14"}, + {AMDGPU_GFX_LDS_BANK15, "LDS_BANK15"}, + {AMDGPU_GFX_LDS_BANK16, "LDS_BANK16"}, + {AMDGPU_GFX_LDS_BANK17, "LDS_BANK17"}, + {AMDGPU_GFX_LDS_BANK18, "LDS_BANK18"}, + {AMDGPU_GFX_LDS_BANK19, "LDS_BANK19"}, + {AMDGPU_GFX_LDS_BANK20, "LDS_BANK20"}, + {AMDGPU_GFX_LDS_BANK21, "LDS_BANK21"}, + {AMDGPU_GFX_LDS_BANK22, "LDS_BANK22"}, + {AMDGPU_GFX_LDS_BANK23, "LDS_BANK23"}, + {AMDGPU_GFX_LDS_BANK24, "LDS_BANK24"}, + {AMDGPU_GFX_LDS_BANK25, "LDS_BANK25"}, + {AMDGPU_GFX_LDS_BANK26, "LDS_BANK26"}, + {AMDGPU_GFX_LDS_BANK27, "LDS_BANK27"}, + {AMDGPU_GFX_LDS_BANK28, "LDS_BANK28"}, + {AMDGPU_GFX_LDS_BANK29, "LDS_BANK29"}, + {AMDGPU_GFX_LDS_BANK30, "LDS_BANK30"}, + {AMDGPU_GFX_LDS_BANK31, "LDS_BANK31"}, + {AMDGPU_GFX_LDS_SP_BUFFER_A, "LDS_SP_BUFFER_A"}, + {AMDGPU_GFX_LDS_SP_BUFFER_B, "LDS_SP_BUFFER_B"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_rlc_mem_list[] = { + {AMDGPU_GFX_RLC_GPMF32, "RLC_GPMF32"}, + {AMDGPU_GFX_RLC_RLCVF32, "RLC_RLCVF32"}, + {AMDGPU_GFX_RLC_SCRATCH, "RLC_SCRATCH"}, + {AMDGPU_GFX_RLC_SRM_ARAM, "RLC_SRM_ARAM"}, + {AMDGPU_GFX_RLC_SRM_DRAM, "RLC_SRM_DRAM"}, + {AMDGPU_GFX_RLC_TCTAG, "RLC_TCTAG"}, + {AMDGPU_GFX_RLC_SPM_SE, "RLC_SPM_SE"}, + {AMDGPU_GFX_RLC_SPM_GRBMT, "RLC_SPM_GRBMT"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sp_mem_list[] = { + {AMDGPU_GFX_SP_SIMDID0, "SP_SIMDID0"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_spi_mem_list[] = { + {AMDGPU_GFX_SPI_MEM0, "SPI_MEM0"}, + {AMDGPU_GFX_SPI_MEM1, "SPI_MEM1"}, + {AMDGPU_GFX_SPI_MEM2, "SPI_MEM2"}, + {AMDGPU_GFX_SPI_MEM3, "SPI_MEM3"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sqc_mem_list[] = { + {AMDGPU_GFX_SQC_INST_CACHE_A, "SQC_INST_CACHE_A"}, + {AMDGPU_GFX_SQC_INST_CACHE_B, "SQC_INST_CACHE_B"}, + {AMDGPU_GFX_SQC_INST_CACHE_TAG_A, "SQC_INST_CACHE_TAG_A"}, + {AMDGPU_GFX_SQC_INST_CACHE_TAG_B, "SQC_INST_CACHE_TAG_B"}, + {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_A, "SQC_INST_CACHE_MISS_FIFO_A"}, + {AMDGPU_GFX_SQC_INST_CACHE_MISS_FIFO_B, "SQC_INST_CACHE_MISS_FIFO_B"}, + {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_A, "SQC_INST_CACHE_GATCL1_MISS_FIFO_A"}, + {AMDGPU_GFX_SQC_INST_CACHE_GATCL1_MISS_FIFO_B, "SQC_INST_CACHE_GATCL1_MISS_FIFO_B"}, + {AMDGPU_GFX_SQC_DATA_CACHE_A, "SQC_DATA_CACHE_A"}, + {AMDGPU_GFX_SQC_DATA_CACHE_B, "SQC_DATA_CACHE_B"}, + {AMDGPU_GFX_SQC_DATA_CACHE_TAG_A, "SQC_DATA_CACHE_TAG_A"}, + {AMDGPU_GFX_SQC_DATA_CACHE_TAG_B, "SQC_DATA_CACHE_TAG_B"}, + {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_A, "SQC_DATA_CACHE_MISS_FIFO_A"}, + {AMDGPU_GFX_SQC_DATA_CACHE_MISS_FIFO_B, "SQC_DATA_CACHE_MISS_FIFO_B"}, + {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_A, "SQC_DATA_CACHE_HIT_FIFO_A"}, + {AMDGPU_GFX_SQC_DATA_CACHE_HIT_FIFO_B, "SQC_DATA_CACHE_HIT_FIFO_B"}, + {AMDGPU_GFX_SQC_DIRTY_BIT_A, "SQC_DIRTY_BIT_A"}, + {AMDGPU_GFX_SQC_DIRTY_BIT_B, "SQC_DIRTY_BIT_B"}, + {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU0, "SQC_WRITE_DATA_BUFFER_CU0"}, + {AMDGPU_GFX_SQC_WRITE_DATA_BUFFER_CU1, "SQC_WRITE_DATA_BUFFER_CU1"}, + {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_A"}, + {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B, "SQC_UTCL1_MISS_LFIFO_DATA_CACHE_B"}, + {AMDGPU_GFX_SQC_UTCL1_MISS_LFIFO_INST_CACHE, "SQC_UTCL1_MISS_LFIFO_INST_CACHE"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_sq_mem_list[] = { + {AMDGPU_GFX_SQ_SGPR_MEM0, "SQ_SGPR_MEM0"}, + {AMDGPU_GFX_SQ_SGPR_MEM1, "SQ_SGPR_MEM1"}, + {AMDGPU_GFX_SQ_SGPR_MEM2, "SQ_SGPR_MEM2"}, + {AMDGPU_GFX_SQ_SGPR_MEM3, "SQ_SGPR_MEM3"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_ta_mem_list[] = { + {AMDGPU_GFX_TA_FS_AFIFO_RAM_LO, "TA_FS_AFIFO_RAM_LO"}, + {AMDGPU_GFX_TA_FS_AFIFO_RAM_HI, "TA_FS_AFIFO_RAM_HI"}, + {AMDGPU_GFX_TA_FS_CFIFO_RAM, "TA_FS_CFIFO_RAM"}, + {AMDGPU_GFX_TA_FSX_LFIFO, "TA_FSX_LFIFO"}, + {AMDGPU_GFX_TA_FS_DFIFO_RAM, "TA_FS_DFIFO_RAM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcc_mem_list[] = { + {AMDGPU_GFX_TCC_MEM1, "TCC_MEM1"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tca_mem_list[] = { + {AMDGPU_GFX_TCA_MEM1, "TCA_MEM1"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tci_mem_list[] = { + {AMDGPU_GFX_TCIW_MEM, "TCIW_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcp_mem_list[] = { + {AMDGPU_GFX_TCP_LFIFO0, "TCP_LFIFO0"}, + {AMDGPU_GFX_TCP_SET0BANK0_RAM, "TCP_SET0BANK0_RAM"}, + {AMDGPU_GFX_TCP_SET0BANK1_RAM, "TCP_SET0BANK1_RAM"}, + {AMDGPU_GFX_TCP_SET0BANK2_RAM, "TCP_SET0BANK2_RAM"}, + {AMDGPU_GFX_TCP_SET0BANK3_RAM, "TCP_SET0BANK3_RAM"}, + {AMDGPU_GFX_TCP_SET1BANK0_RAM, "TCP_SET1BANK0_RAM"}, + {AMDGPU_GFX_TCP_SET1BANK1_RAM, "TCP_SET1BANK1_RAM"}, + {AMDGPU_GFX_TCP_SET1BANK2_RAM, "TCP_SET1BANK2_RAM"}, + {AMDGPU_GFX_TCP_SET1BANK3_RAM, "TCP_SET1BANK3_RAM"}, + {AMDGPU_GFX_TCP_SET2BANK0_RAM, "TCP_SET2BANK0_RAM"}, + {AMDGPU_GFX_TCP_SET2BANK1_RAM, "TCP_SET2BANK1_RAM"}, + {AMDGPU_GFX_TCP_SET2BANK2_RAM, "TCP_SET2BANK2_RAM"}, + {AMDGPU_GFX_TCP_SET2BANK3_RAM, "TCP_SET2BANK3_RAM"}, + {AMDGPU_GFX_TCP_SET3BANK0_RAM, "TCP_SET3BANK0_RAM"}, + {AMDGPU_GFX_TCP_SET3BANK1_RAM, "TCP_SET3BANK1_RAM"}, + {AMDGPU_GFX_TCP_SET3BANK2_RAM, "TCP_SET3BANK2_RAM"}, + {AMDGPU_GFX_TCP_SET3BANK3_RAM, "TCP_SET3BANK3_RAM"}, + {AMDGPU_GFX_TCP_VM_FIFO, "TCP_VM_FIFO"}, + {AMDGPU_GFX_TCP_DB_TAGRAM0, "TCP_DB_TAGRAM0"}, + {AMDGPU_GFX_TCP_DB_TAGRAM1, "TCP_DB_TAGRAM1"}, + {AMDGPU_GFX_TCP_DB_TAGRAM2, "TCP_DB_TAGRAM2"}, + {AMDGPU_GFX_TCP_DB_TAGRAM3, "TCP_DB_TAGRAM3"}, + {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE0, "TCP_UTCL1_LFIFO_PROBE0"}, + {AMDGPU_GFX_TCP_UTCL1_LFIFO_PROBE1, "TCP_UTCL1_LFIFO_PROBE1"}, + {AMDGPU_GFX_TCP_CMD_FIFO, "TCP_CMD_FIFO"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_td_mem_list[] = { + {AMDGPU_GFX_TD_UTD_CS_FIFO_MEM, "TD_UTD_CS_FIFO_MEM"}, + {AMDGPU_GFX_TD_UTD_SS_FIFO_LO_MEM, "TD_UTD_SS_FIFO_LO_MEM"}, + {AMDGPU_GFX_TD_UTD_SS_FIFO_HI_MEM, "TD_UTD_SS_FIFO_HI_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_tcx_mem_list[] = { + {AMDGPU_GFX_TCX_FIFOD0, "TCX_FIFOD0"}, + {AMDGPU_GFX_TCX_FIFOD1, "TCX_FIFOD1"}, + {AMDGPU_GFX_TCX_FIFOD2, "TCX_FIFOD2"}, + {AMDGPU_GFX_TCX_FIFOD3, "TCX_FIFOD3"}, + {AMDGPU_GFX_TCX_FIFOD4, "TCX_FIFOD4"}, + {AMDGPU_GFX_TCX_FIFOD5, "TCX_FIFOD5"}, + {AMDGPU_GFX_TCX_FIFOD6, "TCX_FIFOD6"}, + {AMDGPU_GFX_TCX_FIFOD7, "TCX_FIFOD7"}, + {AMDGPU_GFX_TCX_FIFOB0, "TCX_FIFOB0"}, + {AMDGPU_GFX_TCX_FIFOB1, "TCX_FIFOB1"}, + {AMDGPU_GFX_TCX_FIFOB2, "TCX_FIFOB2"}, + {AMDGPU_GFX_TCX_FIFOB3, "TCX_FIFOB3"}, + {AMDGPU_GFX_TCX_FIFOB4, "TCX_FIFOB4"}, + {AMDGPU_GFX_TCX_FIFOB5, "TCX_FIFOB5"}, + {AMDGPU_GFX_TCX_FIFOB6, "TCX_FIFOB6"}, + {AMDGPU_GFX_TCX_FIFOB7, "TCX_FIFOB7"}, + {AMDGPU_GFX_TCX_FIFOA0, "TCX_FIFOA0"}, + {AMDGPU_GFX_TCX_FIFOA1, "TCX_FIFOA1"}, + {AMDGPU_GFX_TCX_FIFOA2, "TCX_FIFOA2"}, + {AMDGPU_GFX_TCX_FIFOA3, "TCX_FIFOA3"}, + {AMDGPU_GFX_TCX_FIFOA4, "TCX_FIFOA4"}, + {AMDGPU_GFX_TCX_FIFOA5, "TCX_FIFOA5"}, + {AMDGPU_GFX_TCX_FIFOA6, "TCX_FIFOA6"}, + {AMDGPU_GFX_TCX_FIFOA7, "TCX_FIFOA7"}, + {AMDGPU_GFX_TCX_CFIFO0, "TCX_CFIFO0"}, + {AMDGPU_GFX_TCX_CFIFO1, "TCX_CFIFO1"}, + {AMDGPU_GFX_TCX_CFIFO2, "TCX_CFIFO2"}, + {AMDGPU_GFX_TCX_CFIFO3, "TCX_CFIFO3"}, + {AMDGPU_GFX_TCX_CFIFO4, "TCX_CFIFO4"}, + {AMDGPU_GFX_TCX_CFIFO5, "TCX_CFIFO5"}, + {AMDGPU_GFX_TCX_CFIFO6, "TCX_CFIFO6"}, + {AMDGPU_GFX_TCX_CFIFO7, "TCX_CFIFO7"}, + {AMDGPU_GFX_TCX_FIFO_ACKB0, "TCX_FIFO_ACKB0"}, + {AMDGPU_GFX_TCX_FIFO_ACKB1, "TCX_FIFO_ACKB1"}, + {AMDGPU_GFX_TCX_FIFO_ACKB2, "TCX_FIFO_ACKB2"}, + {AMDGPU_GFX_TCX_FIFO_ACKB3, "TCX_FIFO_ACKB3"}, + {AMDGPU_GFX_TCX_FIFO_ACKB4, "TCX_FIFO_ACKB4"}, + {AMDGPU_GFX_TCX_FIFO_ACKB5, "TCX_FIFO_ACKB5"}, + {AMDGPU_GFX_TCX_FIFO_ACKB6, "TCX_FIFO_ACKB6"}, + {AMDGPU_GFX_TCX_FIFO_ACKB7, "TCX_FIFO_ACKB7"}, + {AMDGPU_GFX_TCX_FIFO_ACKD0, "TCX_FIFO_ACKD0"}, + {AMDGPU_GFX_TCX_FIFO_ACKD1, "TCX_FIFO_ACKD1"}, + {AMDGPU_GFX_TCX_FIFO_ACKD2, "TCX_FIFO_ACKD2"}, + {AMDGPU_GFX_TCX_FIFO_ACKD3, "TCX_FIFO_ACKD3"}, + {AMDGPU_GFX_TCX_FIFO_ACKD4, "TCX_FIFO_ACKD4"}, + {AMDGPU_GFX_TCX_FIFO_ACKD5, "TCX_FIFO_ACKD5"}, + {AMDGPU_GFX_TCX_FIFO_ACKD6, "TCX_FIFO_ACKD6"}, + {AMDGPU_GFX_TCX_FIFO_ACKD7, "TCX_FIFO_ACKD7"}, + {AMDGPU_GFX_TCX_DST_FIFOA0, "TCX_DST_FIFOA0"}, + {AMDGPU_GFX_TCX_DST_FIFOA1, "TCX_DST_FIFOA1"}, + {AMDGPU_GFX_TCX_DST_FIFOA2, "TCX_DST_FIFOA2"}, + {AMDGPU_GFX_TCX_DST_FIFOA3, "TCX_DST_FIFOA3"}, + {AMDGPU_GFX_TCX_DST_FIFOA4, "TCX_DST_FIFOA4"}, + {AMDGPU_GFX_TCX_DST_FIFOA5, "TCX_DST_FIFOA5"}, + {AMDGPU_GFX_TCX_DST_FIFOA6, "TCX_DST_FIFOA6"}, + {AMDGPU_GFX_TCX_DST_FIFOA7, "TCX_DST_FIFOA7"}, + {AMDGPU_GFX_TCX_DST_FIFOB0, "TCX_DST_FIFOB0"}, + {AMDGPU_GFX_TCX_DST_FIFOB1, "TCX_DST_FIFOB1"}, + {AMDGPU_GFX_TCX_DST_FIFOB2, "TCX_DST_FIFOB2"}, + {AMDGPU_GFX_TCX_DST_FIFOB3, "TCX_DST_FIFOB3"}, + {AMDGPU_GFX_TCX_DST_FIFOB4, "TCX_DST_FIFOB4"}, + {AMDGPU_GFX_TCX_DST_FIFOB5, "TCX_DST_FIFOB5"}, + {AMDGPU_GFX_TCX_DST_FIFOB6, "TCX_DST_FIFOB6"}, + {AMDGPU_GFX_TCX_DST_FIFOB7, "TCX_DST_FIFOB7"}, + {AMDGPU_GFX_TCX_DST_FIFOD0, "TCX_DST_FIFOD0"}, + {AMDGPU_GFX_TCX_DST_FIFOD1, "TCX_DST_FIFOD1"}, + {AMDGPU_GFX_TCX_DST_FIFOD2, "TCX_DST_FIFOD2"}, + {AMDGPU_GFX_TCX_DST_FIFOD3, "TCX_DST_FIFOD3"}, + {AMDGPU_GFX_TCX_DST_FIFOD4, "TCX_DST_FIFOD4"}, + {AMDGPU_GFX_TCX_DST_FIFOD5, "TCX_DST_FIFOD5"}, + {AMDGPU_GFX_TCX_DST_FIFOD6, "TCX_DST_FIFOD6"}, + {AMDGPU_GFX_TCX_DST_FIFOD7, "TCX_DST_FIFOD7"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB0, "TCX_DST_FIFO_ACKB0"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB1, "TCX_DST_FIFO_ACKB1"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB2, "TCX_DST_FIFO_ACKB2"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB3, "TCX_DST_FIFO_ACKB3"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB4, "TCX_DST_FIFO_ACKB4"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB5, "TCX_DST_FIFO_ACKB5"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB6, "TCX_DST_FIFO_ACKB6"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKB7, "TCX_DST_FIFO_ACKB7"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD0, "TCX_DST_FIFO_ACKD0"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD1, "TCX_DST_FIFO_ACKD1"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD2, "TCX_DST_FIFO_ACKD2"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD3, "TCX_DST_FIFO_ACKD3"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD4, "TCX_DST_FIFO_ACKD4"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD5, "TCX_DST_FIFO_ACKD5"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD6, "TCX_DST_FIFO_ACKD6"}, + {AMDGPU_GFX_TCX_DST_FIFO_ACKD7, "TCX_DST_FIFO_ACKD7"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_atc_l2_mem_list[] = { + {AMDGPU_GFX_ATC_L2_MEM, "ATC_L2_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_utcl2_mem_list[] = { + {AMDGPU_GFX_UTCL2_MEM, "UTCL2_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_mem_list[] = { + {AMDGPU_GFX_VML2_MEM, "VML2_MEM"}, +}; + +static const struct amdgpu_ras_memory_id_entry gfx_v9_4_3_ras_vml2_walker_mem_list[] = { + {AMDGPU_GFX_VML2_WALKER_MEM, "VML2_WALKER_MEM"}, +}; + +static const struct amdgpu_gfx_ras_mem_id_entry gfx_v9_4_3_ras_mem_list_array[AMDGPU_GFX_MEM_TYPE_NUM] = { + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_cp_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcea_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gc_cane_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gcutcl2_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_gds_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_lds_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_rlc_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sp_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_spi_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sqc_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_sq_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_ta_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcc_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tca_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tci_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcp_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_td_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_tcx_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_atc_l2_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_utcl2_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_mem_list) + AMDGPU_GFX_MEMID_ENT(gfx_v9_4_3_ras_vml2_walker_mem_list) +}; + +static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ce_reg_list[] = { + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_CE_ERR_STATUS_LOW, regRLC_CE_ERR_STATUS_HIGH), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"}, + AMDGPU_GFX_RLC_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_CE_ERR_STATUS_LO, regCPC_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_CE_ERR_STATUS_LO, regCPF_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_CE_ERR_STATUS_LO, regCPG_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_CE_ERR_STATUS_LO, regGDS_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"}, + AMDGPU_GFX_GDS_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_CE_ERR_STATUS_LO, regGC_CANE_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"}, + AMDGPU_GFX_GC_CANE_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_CE_ERR_STATUS_LO, regSPI_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"}, + AMDGPU_GFX_SPI_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_CE_ERR_STATUS_LO, regSP0_CE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"}, + AMDGPU_GFX_SP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_CE_ERR_STATUS_LO, regSP1_CE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"}, + AMDGPU_GFX_SP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_CE_ERR_STATUS_LO, regSQ_CE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"}, + AMDGPU_GFX_SQ_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_CE_EDC_LO, regSQC_CE_EDC_HI), + 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"}, + AMDGPU_GFX_SQC_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_CE_ERR_STATUS_LO, regTCX_CE_ERR_STATUS_HI), + 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"}, + AMDGPU_GFX_TCX_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_CE_ERR_STATUS_LO, regTCC_CE_ERR_STATUS_HI), + 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"}, + AMDGPU_GFX_TCC_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_CE_EDC_LO, regTA_CE_EDC_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"}, + AMDGPU_GFX_TA_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_CE_EDC_LO_REG, regTCI_CE_EDC_HI_REG), + 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"}, + AMDGPU_GFX_TCI_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_CE_EDC_LO_REG, regTCP_CE_EDC_HI_REG), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"}, + AMDGPU_GFX_TCP_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_CE_EDC_LO, regTD_CE_EDC_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"}, + AMDGPU_GFX_TD_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_CE_ERR_STATUS_LO, regGCEA_CE_ERR_STATUS_HI), + 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"}, + AMDGPU_GFX_GCEA_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_CE_ERR_STATUS_LO, regLDS_CE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"}, + AMDGPU_GFX_LDS_MEM, 1}, +}; + +static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = { + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regRLC_UE_ERR_STATUS_LOW, regRLC_UE_ERR_STATUS_HIGH), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "RLC"}, + AMDGPU_GFX_RLC_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPC_UE_ERR_STATUS_LO, regCPC_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPC"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPF_UE_ERR_STATUS_LO, regCPF_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPF"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regCPG_UE_ERR_STATUS_LO, regCPG_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CPG"}, + AMDGPU_GFX_CP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGDS_UE_ERR_STATUS_LO, regGDS_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GDS"}, + AMDGPU_GFX_GDS_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGC_CANE_UE_ERR_STATUS_LO, regGC_CANE_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "CANE"}, + AMDGPU_GFX_GC_CANE_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_UE_ERR_STATUS_LO, regSPI_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"}, + AMDGPU_GFX_SPI_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_UE_ERR_STATUS_LO, regSP0_UE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"}, + AMDGPU_GFX_SP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_UE_ERR_STATUS_LO, regSP1_UE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"}, + AMDGPU_GFX_SP_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_UE_ERR_STATUS_LO, regSQ_UE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"}, + AMDGPU_GFX_SQ_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_UE_EDC_LO, regSQC_UE_EDC_HI), + 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"}, + AMDGPU_GFX_SQC_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_UE_ERR_STATUS_LO, regTCX_UE_ERR_STATUS_HI), + 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"}, + AMDGPU_GFX_TCX_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCC_UE_ERR_STATUS_LO, regTCC_UE_ERR_STATUS_HI), + 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCC"}, + AMDGPU_GFX_TCC_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_UE_EDC_LO, regTA_UE_EDC_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"}, + AMDGPU_GFX_TA_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_UE_EDC_LO_REG, regTCI_UE_EDC_HI_REG), + 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"}, + AMDGPU_GFX_TCI_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_UE_EDC_LO_REG, regTCP_UE_EDC_HI_REG), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"}, + AMDGPU_GFX_TCP_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_UE_EDC_LO, regTD_UE_EDC_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"}, + AMDGPU_GFX_TD_MEM, 8}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCA_UE_ERR_STATUS_LO, regTCA_UE_ERR_STATUS_HI), + 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCA"}, + AMDGPU_GFX_TCA_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_UE_ERR_STATUS_LO, regGCEA_UE_ERR_STATUS_HI), + 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"}, + AMDGPU_GFX_GCEA_MEM, 1}, + {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_UE_ERR_STATUS_LO, regLDS_UE_ERR_STATUS_HI), + 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"}, + AMDGPU_GFX_LDS_MEM, 1}, +}; + +static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = { + SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 +}; + +static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, + void *ras_error_status, int xcc_id) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + unsigned long ce_count = 0, ue_count = 0; + uint32_t i, j, k; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 || + gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); + + amdgpu_ras_inst_query_ras_error_count(adev, + &(gfx_v9_4_3_ce_reg_list[i].reg_entry), + 1, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].mem_id_ent, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ce_reg_list[i].mem_id_type].size, + GET_INST(GC, xcc_id), + AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, + &ce_count); + + amdgpu_ras_inst_query_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size, + GET_INST(GC, xcc_id), + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + &ue_count); + } + } + } + + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); + + /* the caller should make sure initialize value of + * err_data->ue_count and err_data->ce_count + */ + err_data->ce_count += ce_count; + err_data->ue_count += ue_count; +} + +static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, + void *ras_error_status, int xcc_id) +{ + uint32_t i, j, k; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ce_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ce_reg_list[i].se_num > 1 || + gfx_v9_4_3_ce_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); + + amdgpu_ras_inst_reset_ras_error_count(adev, + &(gfx_v9_4_3_ce_reg_list[i].reg_entry), + 1, + GET_INST(GC, xcc_id)); + + amdgpu_ras_inst_reset_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + GET_INST(GC, xcc_id)); + } + } + } + + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + +static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t i, j; + uint32_t reg_value; + + mutex_lock(&adev->grbm_idx_mutex); + + for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { + for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { + gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); + reg_value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regGCEA_ERR_STATUS); + if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { + dev_warn(adev->dev, + "GCEA err detected at instance: %d, status: 0x%x!\n", + j, reg_value); + } + /* clear after read */ + reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS, + CLEAR_ERROR_STATUS, 0x1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, + reg_value); + } + } + + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + +static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t data; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS); + if (data) { + dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3); + } + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS); + if (data) { + dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3); + } + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regVML2_WALKER_MEM_ECC_STATUS); + if (data) { + dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, + 0x3); + } +} + +static void gfx_v9_4_3_log_cu_timeout_status(struct amdgpu_device *adev, + uint32_t status, int xcc_id) +{ + struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; + uint32_t i, simd, wave; + uint32_t wave_status; + uint32_t wave_pc_lo, wave_pc_hi; + uint32_t wave_exec_lo, wave_exec_hi; + uint32_t wave_inst_dw0, wave_inst_dw1; + uint32_t wave_ib_sts; + + for (i = 0; i < 32; i++) { + if (!((i << 1) & status)) + continue; + + simd = i / cu_info->max_waves_per_simd; + wave = i % cu_info->max_waves_per_simd; + + wave_status = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS); + wave_pc_lo = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO); + wave_pc_hi = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI); + wave_exec_lo = + wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO); + wave_exec_hi = + wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI); + wave_inst_dw0 = + wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0); + wave_inst_dw1 = + wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1); + wave_ib_sts = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS); + + dev_info( + adev->dev, + "\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n", + simd, wave, wave_status, + ((uint64_t)wave_pc_hi << 32 | wave_pc_lo), + ((uint64_t)wave_exec_hi << 32 | wave_exec_lo), + ((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0), + wave_ib_sts); + } +} + +static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t se_idx, sh_idx, cu_idx; + uint32_t status; + + mutex_lock(&adev->grbm_idx_mutex); + for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) { + for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) { + for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) { + gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx, + cu_idx, xcc_id); + status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), + regSQ_TIMEOUT_STATUS); + if (status != 0) { + dev_info( + adev->dev, + "GFX Watchdog Timeout: SE %d, SH %d, CU %d\n", + se_idx, sh_idx, cu_idx); + gfx_v9_4_3_log_cu_timeout_status( + adev, status, xcc_id); + } + /* clear old status */ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regSQ_TIMEOUT_STATUS, 0); + } + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + +static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev, + void *ras_error_status, int xcc_id) +{ + gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id); + gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id); + gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id); +} + +static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev, + int xcc_id) +{ + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3); +} + +static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t i, j; + uint32_t value; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { + for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { + gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); + value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS); + value = REG_SET_FIELD(value, GCEA_ERR_STATUS, + CLEAR_ERROR_STATUS, 0x1); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value); + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + +static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t se_idx, sh_idx, cu_idx; + + mutex_lock(&adev->grbm_idx_mutex); + for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) { + for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) { + for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) { + gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx, + cu_idx, xcc_id); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), + regSQ_TIMEOUT_STATUS, 0); + } + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + +static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev, + void *ras_error_status, int xcc_id) +{ + gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id); + gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id); + gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id); +} + +static void gfx_v9_4_3_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_gfx_ras_error_func(adev, ras_error_status, + gfx_v9_4_3_inst_query_ras_err_count); +} + +static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev) +{ + amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count); +} + +static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev) +{ + amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status); +} + +static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev) +{ + amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status); +} + +static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { + .name = "gfx_v9_4_3", + .early_init = gfx_v9_4_3_early_init, + .late_init = gfx_v9_4_3_late_init, + .sw_init = gfx_v9_4_3_sw_init, + .sw_fini = gfx_v9_4_3_sw_fini, + .hw_init = gfx_v9_4_3_hw_init, + .hw_fini = gfx_v9_4_3_hw_fini, + .suspend = gfx_v9_4_3_suspend, + .resume = gfx_v9_4_3_resume, + .is_idle = gfx_v9_4_3_is_idle, + .wait_for_idle = gfx_v9_4_3_wait_for_idle, + .soft_reset = gfx_v9_4_3_soft_reset, + .set_clockgating_state = gfx_v9_4_3_set_clockgating_state, + .set_powergating_state = gfx_v9_4_3_set_powergating_state, + .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, +}; + +static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { + .type = AMDGPU_RING_TYPE_COMPUTE, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v9_4_3_ring_get_rptr_compute, + .get_wptr = gfx_v9_4_3_ring_get_wptr_compute, + .set_wptr = gfx_v9_4_3_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v9_4_3_ring_emit_gds_switch */ + 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */ + 5 + /* hdp invalidate */ + 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v9_4_3_ring_emit_vm_flush */ + 8 + 8 + 8 + /* gfx_v9_4_3_ring_emit_fence x3 for user fence, vm fence */ + 7 + /* gfx_v9_4_3_emit_mem_sync */ + 5 + /* gfx_v9_4_3_emit_wave_limit for updating regSPI_WCL_PIPE_PERCENT_GFX register */ + 15, /* for updating 3 regSPI_WCL_PIPE_PERCENT_CS registers */ + .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */ + .emit_ib = gfx_v9_4_3_ring_emit_ib_compute, + .emit_fence = gfx_v9_4_3_ring_emit_fence, + .emit_pipeline_sync = gfx_v9_4_3_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v9_4_3_ring_emit_vm_flush, + .emit_gds_switch = gfx_v9_4_3_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v9_4_3_ring_emit_hdp_flush, + .test_ring = gfx_v9_4_3_ring_test_ring, + .test_ib = gfx_v9_4_3_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_wreg = gfx_v9_4_3_ring_emit_wreg, + .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait, + .emit_mem_sync = gfx_v9_4_3_emit_mem_sync, + .emit_wave_limit = gfx_v9_4_3_emit_wave_limit, +}; + +static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { + .type = AMDGPU_RING_TYPE_KIQ, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v9_4_3_ring_get_rptr_compute, + .get_wptr = gfx_v9_4_3_ring_get_wptr_compute, + .set_wptr = gfx_v9_4_3_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v9_4_3_ring_emit_gds_switch */ + 7 + /* gfx_v9_4_3_ring_emit_hdp_flush */ + 5 + /* hdp invalidate */ + 7 + /* gfx_v9_4_3_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v9_4_3_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v9_4_3_ring_emit_fence_kiq x3 for user fence, vm fence */ + .emit_ib_size = 7, /* gfx_v9_4_3_ring_emit_ib_compute */ + .emit_fence = gfx_v9_4_3_ring_emit_fence_kiq, + .test_ring = gfx_v9_4_3_ring_test_ring, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_rreg = gfx_v9_4_3_ring_emit_rreg, + .emit_wreg = gfx_v9_4_3_ring_emit_wreg, + .emit_reg_wait = gfx_v9_4_3_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_4_3_ring_emit_reg_write_reg_wait, +}; + +static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev) +{ + int i, j, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + adev->gfx.kiq[i].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq; + + for (j = 0; j < adev->gfx.num_compute_rings; j++) + adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs + = &gfx_v9_4_3_ring_funcs_compute; + } +} + +static const struct amdgpu_irq_src_funcs gfx_v9_4_3_eop_irq_funcs = { + .set = gfx_v9_4_3_set_eop_interrupt_state, + .process = gfx_v9_4_3_eop_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_reg_irq_funcs = { + .set = gfx_v9_4_3_set_priv_reg_fault_state, + .process = gfx_v9_4_3_priv_reg_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v9_4_3_priv_inst_irq_funcs = { + .set = gfx_v9_4_3_set_priv_inst_fault_state, + .process = gfx_v9_4_3_priv_inst_irq, +}; + +static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; + adev->gfx.eop_irq.funcs = &gfx_v9_4_3_eop_irq_funcs; + + adev->gfx.priv_reg_irq.num_types = 1; + adev->gfx.priv_reg_irq.funcs = &gfx_v9_4_3_priv_reg_irq_funcs; + + adev->gfx.priv_inst_irq.num_types = 1; + adev->gfx.priv_inst_irq.funcs = &gfx_v9_4_3_priv_inst_irq_funcs; +} + +static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev) +{ + adev->gfx.rlc.funcs = &gfx_v9_4_3_rlc_funcs; +} + + +static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) +{ + /* init asci gds info */ + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 3): + /* 9.4.3 removed all the GDS internal memory, + * only support GWS opcode in kernel, like barrier + * semaphore.etc */ + adev->gds.gds_size = 0; + break; + default: + adev->gds.gds_size = 0x10000; + break; + } + + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 3): + /* deprecated for 9.4.3, no usage at all */ + adev->gds.gds_compute_max_wave_id = 0; + break; + default: + /* this really depends on the chip */ + adev->gds.gds_compute_max_wave_id = 0x7ff; + break; + } + + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; +} + +static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, + u32 bitmap) +{ + u32 data; + + if (!bitmap) + return; + + data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; + data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; + + WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data); +} + +static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) +{ + u32 data, mask; + + data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG); + + data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; + data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; + + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); + + return (~data) & mask; +} + +static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info) +{ + int i, j, k, counter, active_cu_number = 0; + u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; + unsigned disable_masks[4 * 4]; + + if (!adev || !cu_info) + return -EINVAL; + + /* + * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs + */ + if (adev->gfx.config.max_shader_engines * + adev->gfx.config.max_sh_per_se > 16) + return -EINVAL; + + amdgpu_gfx_parse_disable_cu(disable_masks, + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se); + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + ao_bitmap = 0; + counter = 0; + gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0); + gfx_v9_4_3_set_user_cu_inactive_bitmap( + adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); + bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev); + + /* + * The bitmap(and ao_cu_bitmap) in cu_info structure is + * 4x4 size array, and it's usually suitable for Vega + * ASICs which has 4*2 SE/SH layout. + * But for Arcturus, SE/SH layout is changed to 8*1. + * To mostly reduce the impact, we make it compatible + * with current bitmap array as below: + * SE4,SH0 --> bitmap[0][1] + * SE5,SH0 --> bitmap[1][1] + * SE6,SH0 --> bitmap[2][1] + * SE7,SH0 --> bitmap[3][1] + */ + cu_info->bitmap[i % 4][j + i / 4] = bitmap; + + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { + if (bitmap & mask) { + if (counter < adev->gfx.config.max_cu_per_sh) + ao_bitmap |= mask; + counter++; + } + mask <<= 1; + } + active_cu_number += counter; + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + 0); + mutex_unlock(&adev->grbm_idx_mutex); + + cu_info->number = active_cu_number; + cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + + return 0; +} + +const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 9, + .minor = 4, + .rev = 0, + .funcs = &gfx_v9_4_3_ip_funcs, +}; + +static int gfx_v9_4_3_xcp_resume(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t tmp_mask; + int i, r; + + /* TODO : Initialize golden regs */ + /* gfx_v9_4_3_init_golden_registers(adev); */ + + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) + gfx_v9_4_3_xcc_constants_init(adev, i); + + if (!amdgpu_sriov_vf(adev)) { + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { + r = gfx_v9_4_3_xcc_rlc_resume(adev, i); + if (r) + return r; + } + } + + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { + r = gfx_v9_4_3_xcc_cp_resume(adev, i); + if (r) + return r; + } + + return 0; +} + +static int gfx_v9_4_3_xcp_suspend(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + for_each_inst(i, inst_mask) + gfx_v9_4_3_xcc_fini(adev, i); + + return 0; +} + +struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = { + .suspend = &gfx_v9_4_3_xcp_suspend, + .resume = &gfx_v9_4_3_xcp_resume +}; + +struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = { + .query_ras_error_count = &gfx_v9_4_3_query_ras_error_count, + .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count, + .query_ras_error_status = &gfx_v9_4_3_query_ras_error_status, + .reset_ras_error_status = &gfx_v9_4_3_reset_ras_error_status, +}; + +struct amdgpu_gfx_ras gfx_v9_4_3_ras = { + .ras_block = { + .hw_ops = &gfx_v9_4_3_ras_ops, + }, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h index 84e69701b81a..42d67ee0e7ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.h @@ -24,7 +24,8 @@ #ifndef __GFX_V9_4_3_H__ #define __GFX_V9_4_3_H__ -extern const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs; -extern const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs; +extern const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block; + +extern struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs; #endif /* __GFX_V9_4_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index ab2325f6c7ac..d94cc1ec7242 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -40,7 +40,7 @@ static void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -247,7 +247,7 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -307,7 +307,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -338,7 +338,7 @@ static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -411,7 +411,7 @@ static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, static void gfxhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index c59c6c85fbff..4dabf910334b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -21,6 +21,7 @@ * */ #include "amdgpu.h" +#include "amdgpu_xcp.h" #include "gfxhub_v1_2.h" #include "gfxhub_v1_1.h" @@ -35,227 +36,288 @@ static u64 gfxhub_v1_2_get_mc_fb_offset(struct amdgpu_device *adev) { - return (u64)RREG32_SOC15(GC, 0, regMC_VM_FB_OFFSET) << 24; + return (u64)RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_FB_OFFSET) << 24; +} + +static void gfxhub_v1_2_xcc_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base, + uint32_t xcc_mask) +{ + struct amdgpu_vmhub *hub; + int i; + + for_each_inst(i, xcc_mask) { + hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; + WREG32_SOC15_OFFSET(GC, GET_INST(GC, i), + regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(GC, GET_INST(GC, i), + regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); + } } static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; - - WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - hub->ctx_addr_distance * vmid, - lower_32_bits(page_table_base)); + uint32_t xcc_mask; - WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - hub->ctx_addr_distance * vmid, - upper_32_bits(page_table_base)); + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); + gfxhub_v1_2_xcc_setup_vm_pt_regs(adev, vmid, page_table_base, xcc_mask); } -static void gfxhub_v1_2_init_gart_aperture_regs(struct amdgpu_device *adev) +static void gfxhub_v1_2_xcc_init_gart_aperture_regs(struct amdgpu_device *adev, + uint32_t xcc_mask) { uint64_t pt_base; + int i; if (adev->gmc.pdb0_bo) pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo); else pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); - gfxhub_v1_2_setup_vm_pt_regs(adev, 0, pt_base); + gfxhub_v1_2_xcc_setup_vm_pt_regs(adev, 0, pt_base, xcc_mask); /* If use GART for FB translation, vmid0 page table covers both * vram and system memory (gart) */ - if (adev->gmc.pdb0_bo) { - WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->gmc.fb_start >> 12)); - WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->gmc.fb_start >> 44)); - - WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, - (u32)(adev->gmc.gart_end >> 12)); - WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, - (u32)(adev->gmc.gart_end >> 44)); - } else { - WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->gmc.gart_start >> 12)); - WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->gmc.gart_start >> 44)); - - WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, - (u32)(adev->gmc.gart_end >> 12)); - WREG32_SOC15(GC, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, - (u32)(adev->gmc.gart_end >> 44)); + for_each_inst(i, xcc_mask) { + if (adev->gmc.pdb0_bo) { + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.fb_start >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.fb_start >> 44)); + + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); + } else { + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); + } } } -static void gfxhub_v1_2_init_system_aperture_regs(struct amdgpu_device *adev) +static void +gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev, + uint32_t xcc_mask) { uint64_t value; uint32_t tmp; + int i; - /* Program the AGP BAR */ - WREG32_SOC15_RLC(GC, 0, regMC_VM_AGP_BASE, 0); - WREG32_SOC15_RLC(GC, 0, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); - WREG32_SOC15_RLC(GC, 0, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); - - if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) { - /* Program the system aperture low logical page number. */ - WREG32_SOC15_RLC(GC, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, - min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - - if (adev->apu_flags & AMD_APU_IS_RAVEN2) - /* - * Raven2 has a HW issue that it is unable to use the - * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. - * So here is the workaround that increase system - * aperture high address (add 1) to get rid of the VM - * fault and hardware hang. - */ - WREG32_SOC15_RLC(GC, 0, - regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max((adev->gmc.fb_end >> 18) + 0x1, - adev->gmc.agp_end >> 18)); - else - WREG32_SOC15_RLC(GC, 0, - regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); - - /* Set default page address. */ - value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); - WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, - (u32)(value >> 12)); - WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, - (u32)(value >> 44)); - - /* Program "protection fault". */ - WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, - (u32)(adev->dummy_page_addr >> 12)); - WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, - (u32)((u64)adev->dummy_page_addr >> 44)); - - tmp = RREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL2); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); - WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL2, tmp); - } - - /* In the case squeezing vram into GART aperture, we don't use - * FB aperture and AGP aperture. Disable them. - */ - if (adev->gmc.pdb0_bo) { - WREG32_SOC15(GC, 0, regMC_VM_FB_LOCATION_TOP, 0); - WREG32_SOC15(GC, 0, regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); - WREG32_SOC15(GC, 0, regMC_VM_AGP_TOP, 0); - WREG32_SOC15(GC, 0, regMC_VM_AGP_BOT, 0xFFFFFF); - WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF); - WREG32_SOC15(GC, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0); + for_each_inst(i, xcc_mask) { + /* Program the AGP BAR */ + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BASE, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + + if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) { + /* Program the system aperture low logical page number. */ + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + + if (adev->apu_flags & AMD_APU_IS_RAVEN2) + /* + * Raven2 has a HW issue that it is unable to use the + * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. + * So here is the workaround that increase system + * aperture high address (add 1) to get rid of the VM + * fault and hardware hang. + */ + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max((adev->gmc.fb_end >> 18) + 0x1, + adev->gmc.agp_end >> 18)); + else + WREG32_SOC15_RLC(GC, GET_INST(GC, i), + regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL2, tmp); + } + + /* In the case squeezing vram into GART aperture, we don't use + * FB aperture and AGP aperture. Disable them. + */ + if (adev->gmc.pdb0_bo) { + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, 0); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_TOP, 0); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_AGP_BOT, 0xFFFFFF); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF); + WREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0); + } } } -static void gfxhub_v1_2_init_tlb_regs(struct amdgpu_device *adev) +static void gfxhub_v1_2_xcc_init_tlb_regs(struct amdgpu_device *adev, + uint32_t xcc_mask) { uint32_t tmp; + int i; - /* Setup TLB control */ - tmp = RREG32_SOC15(GC, 0, regMC_VM_MX_L1_TLB_CNTL); - - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, 1); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, - SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, - MTYPE, MTYPE_UC);/* XXX for emulation. */ - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); - - WREG32_SOC15_RLC(GC, 0, regMC_VM_MX_L1_TLB_CNTL, tmp); + for_each_inst(i, xcc_mask) { + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); + + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_MX_L1_TLB_CNTL, tmp); + } } -static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev) +static void gfxhub_v1_2_xcc_init_cache_regs(struct amdgpu_device *adev, + uint32_t xcc_mask) { uint32_t tmp; + int i; - /* Setup L2 cache */ - tmp = RREG32_SOC15(GC, 0, regVM_L2_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1); - /* XXX for emulation, Refer to closed source code.*/ - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, - 0); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); - WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL, tmp); - - tmp = RREG32_SOC15(GC, 0, regVM_L2_CNTL2); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); - WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL2, tmp); - - tmp = regVM_L2_CNTL3_DEFAULT; - if (adev->gmc.translate_further) { - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, - L2_CACHE_BIGK_FRAGMENT_SIZE, 9); - } else { - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, - L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + for_each_inst(i, xcc_mask) { + /* Setup L2 cache */ + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL2, tmp); + + tmp = regVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL3, tmp); + + tmp = regVM_L2_CNTL4_DEFAULT; + /* For AMD APP APUs setup WC memory */ + if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 1); + } else { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + } + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regVM_L2_CNTL4, tmp); } - WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL3, tmp); - - tmp = regVM_L2_CNTL4_DEFAULT; - if (adev->gmc.xgmi.connected_to_cpu) { - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 1); - } else { - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); - } - WREG32_SOC15_RLC(GC, 0, regVM_L2_CNTL4, tmp); } -static void gfxhub_v1_2_enable_system_domain(struct amdgpu_device *adev) +static void gfxhub_v1_2_xcc_enable_system_domain(struct amdgpu_device *adev, + uint32_t xcc_mask) { uint32_t tmp; + int i; - tmp = RREG32_SOC15(GC, 0, regVM_CONTEXT0_CNTL); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, - adev->gmc.vmid0_page_table_depth); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE, - adev->gmc.vmid0_page_table_block_size); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); - WREG32_SOC15(GC, 0, regVM_CONTEXT0_CNTL, tmp); + for_each_inst(i, xcc_mask) { + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, + adev->gmc.vmid0_page_table_depth); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE, + adev->gmc.vmid0_page_table_block_size); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL, tmp); + } } -static void gfxhub_v1_2_disable_identity_aperture(struct amdgpu_device *adev) +static void +gfxhub_v1_2_xcc_disable_identity_aperture(struct amdgpu_device *adev, + uint32_t xcc_mask) { - WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, - 0XFFFFFFFF); - WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, - 0x0000000F); - - WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, - 0); - WREG32_SOC15(GC, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, - 0); - - WREG32_SOC15(GC, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); - WREG32_SOC15(GC, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); + int i; + for_each_inst(i, xcc_mask) { + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0XFFFFFFFF); + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); + + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); + WREG32_SOC15(GC, GET_INST(GC, i), + regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); + } } -static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) +static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, + uint32_t xcc_mask) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub; unsigned num_level, block_size; uint32_t tmp; - int i; + int i, j; num_level = adev->vm_manager.num_level; block_size = adev->vm_manager.block_size; @@ -264,124 +326,205 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) else block_size -= 9; - for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT1_CNTL, i); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, - num_level); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, - 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PAGE_TABLE_BLOCK_SIZE, - block_size); - /* Send no-retry XNACK on fault to suppress VM fault storm. - * On Aldebaran, XNACK can be enabled in the SQ per-process. - * Retry faults need to be enabled for that to work. - */ - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !adev->gmc.noretry || - adev->asic_type == CHIP_ALDEBARAN); - WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT1_CNTL, - i * hub->ctx_distance, tmp); - WREG32_SOC15_OFFSET(GC, 0, - regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, - i * hub->ctx_addr_distance, 0); - WREG32_SOC15_OFFSET(GC, 0, - regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, - i * hub->ctx_addr_distance, 0); - WREG32_SOC15_OFFSET(GC, 0, - regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, - i * hub->ctx_addr_distance, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(GC, 0, - regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, - i * hub->ctx_addr_distance, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + for_each_inst(j, xcc_mask) { + hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + num_level); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + block_size); + /* Send no-retry XNACK on fault to suppress VM fault storm. + * On 9.4.2 and 9.4.3, XNACK can be enabled in + * the SQ per-process. + * Retry faults need to be enabled for that to work. + */ + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !adev->gmc.noretry || + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), + regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } } } -static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev) +static void gfxhub_v1_2_xcc_program_invalidation(struct amdgpu_device *adev, + uint32_t xcc_mask) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; - unsigned i; - - for (i = 0 ; i < 18; ++i) { - WREG32_SOC15_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - i * hub->eng_addr_distance, 0xffffffff); - WREG32_SOC15_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - i * hub->eng_addr_distance, 0x1f); + struct amdgpu_vmhub *hub; + unsigned int i, j; + + for_each_inst(j, xcc_mask) { + hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; + + for (i = 0 ; i < 18; ++i) { + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x1f); + } } } -static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev) +static int gfxhub_v1_2_xcc_gart_enable(struct amdgpu_device *adev, + uint32_t xcc_mask) { - if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) { - /* - * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are - * VF copy registers so vbios post doesn't program them, for - * SRIOV driver need to program them - */ - WREG32_SOC15_RLC(GC, 0, regMC_VM_FB_LOCATION_BASE, - adev->gmc.vram_start >> 24); - WREG32_SOC15_RLC(GC, 0, regMC_VM_FB_LOCATION_TOP, - adev->gmc.vram_end >> 24); + uint32_t tmp_mask; + int i; + + tmp_mask = xcc_mask; + /* + * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, because they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + if (amdgpu_sriov_vf(adev)) { + for_each_inst(i, tmp_mask) { + i = ffs(tmp_mask) - 1; + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, + adev->gmc.vram_start >> 24); + WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, + adev->gmc.vram_end >> 24); + } } /* GART Enable. */ - gfxhub_v1_2_init_gart_aperture_regs(adev); - gfxhub_v1_2_init_system_aperture_regs(adev); - gfxhub_v1_2_init_tlb_regs(adev); + gfxhub_v1_2_xcc_init_gart_aperture_regs(adev, xcc_mask); + gfxhub_v1_2_xcc_init_system_aperture_regs(adev, xcc_mask); + gfxhub_v1_2_xcc_init_tlb_regs(adev, xcc_mask); if (!amdgpu_sriov_vf(adev)) - gfxhub_v1_2_init_cache_regs(adev); + gfxhub_v1_2_xcc_init_cache_regs(adev, xcc_mask); - gfxhub_v1_2_enable_system_domain(adev); + gfxhub_v1_2_xcc_enable_system_domain(adev, xcc_mask); if (!amdgpu_sriov_vf(adev)) - gfxhub_v1_2_disable_identity_aperture(adev); - gfxhub_v1_2_setup_vmid_config(adev); - gfxhub_v1_2_program_invalidation(adev); + gfxhub_v1_2_xcc_disable_identity_aperture(adev, xcc_mask); + gfxhub_v1_2_xcc_setup_vmid_config(adev, xcc_mask); + gfxhub_v1_2_xcc_program_invalidation(adev, xcc_mask); return 0; } +static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev) +{ + uint32_t xcc_mask; + + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); + return gfxhub_v1_2_xcc_gart_enable(adev, xcc_mask); +} + +static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev, + uint32_t xcc_mask) +{ + struct amdgpu_vmhub *hub; + u32 tmp; + u32 i, j; + + for_each_inst(j, xcc_mask) { + hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, + 0); + WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0); + } +} + static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + uint32_t xcc_mask; + + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); + gfxhub_v1_2_xcc_gart_disable(adev, xcc_mask); +} + +static void gfxhub_v1_2_xcc_set_fault_enable_default(struct amdgpu_device *adev, + bool value, + uint32_t xcc_mask) +{ u32 tmp; - u32 i; - - /* Disable all tables */ - for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(GC, 0, regVM_CONTEXT0_CNTL, - i * hub->ctx_distance, 0); - - /* Setup TLB control */ - tmp = RREG32_SOC15(GC, 0, regMC_VM_MX_L1_TLB_CNTL); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, - 0); - WREG32_SOC15_RLC(GC, 0, regMC_VM_MX_L1_TLB_CNTL, tmp); - - /* Setup L2 cache */ - tmp = RREG32_SOC15(GC, 0, regVM_L2_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(GC, 0, regVM_L2_CNTL, tmp); - WREG32_SOC15(GC, 0, regVM_L2_CNTL3, 0); + int i; + + for_each_inst(i, xcc_mask) { + tmp = RREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + VM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL, tmp); + } } /** @@ -393,72 +536,100 @@ static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev) static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev, bool value) { - u32 tmp; - tmp = RREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, - VM_L2_PROTECTION_FAULT_CNTL, - TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, - value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - if (!value) { - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - CRASH_ON_NO_RETRY_FAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - CRASH_ON_RETRY_FAULT, 1); - } - WREG32_SOC15(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL, tmp); + uint32_t xcc_mask; + + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); + gfxhub_v1_2_xcc_set_fault_enable_default(adev, value, xcc_mask); } -static void gfxhub_v1_2_init(struct amdgpu_device *adev) +static void gfxhub_v1_2_xcc_init(struct amdgpu_device *adev, uint32_t xcc_mask) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub; + int i; + + for_each_inst(i, xcc_mask) { + hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; - hub->ctx0_ptb_addr_lo32 = - SOC15_REG_OFFSET(GC, 0, + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); - hub->ctx0_ptb_addr_hi32 = - SOC15_REG_OFFSET(GC, 0, + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); - hub->vm_inv_eng0_sem = - SOC15_REG_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_SEM); - hub->vm_inv_eng0_req = - SOC15_REG_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_REQ); - hub->vm_inv_eng0_ack = - SOC15_REG_OFFSET(GC, 0, regVM_INVALIDATE_ENG0_ACK); - hub->vm_context0_cntl = - SOC15_REG_OFFSET(GC, 0, regVM_CONTEXT0_CNTL); - hub->vm_l2_pro_fault_status = - SOC15_REG_OFFSET(GC, 0, regVM_L2_PROTECTION_FAULT_STATUS); - hub->vm_l2_pro_fault_cntl = - SOC15_REG_OFFSET(GC, 0, regVM_L2_PROTECTION_FAULT_CNTL); - - hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL; - hub->ctx_addr_distance = regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; - hub->eng_distance = regVM_INVALIDATE_ENG1_REQ - regVM_INVALIDATE_ENG0_REQ; - hub->eng_addr_distance = regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - - regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), + regVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(GC, GET_INST(GC, i), regVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = regVM_CONTEXT1_CNTL - + regVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = + regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = regVM_INVALIDATE_ENG1_REQ - + regVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = + regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + } } +static void gfxhub_v1_2_init(struct amdgpu_device *adev) +{ + uint32_t xcc_mask; + + xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); + gfxhub_v1_2_xcc_init(adev, xcc_mask); +} + +static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) +{ + u32 max_num_physical_nodes; + u32 max_physical_node_id; + u32 xgmi_lfb_cntl; + u32 max_region; + u64 seg_size; + + xgmi_lfb_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_CNTL); + seg_size = REG_GET_FIELD( + RREG32_SOC15(GC, GET_INST(GC, 0), regMC_VM_XGMI_LFB_SIZE), + MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; + max_region = + REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); + + + + max_num_physical_nodes = 8; + max_physical_node_id = 7; + + /* PF_MAX_REGION=0 means xgmi is disabled */ + if (max_region || adev->gmc.xgmi.connected_to_cpu) { + adev->gmc.xgmi.num_physical_nodes = max_region + 1; + + if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) + return -EINVAL; + + adev->gmc.xgmi.physical_node_id = + REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, + PF_LFB_REGION); + + if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) + return -EINVAL; + + adev->gmc.xgmi.node_segment_size = seg_size; + } + + return 0; +} const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset, @@ -467,5 +638,38 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .gart_disable = gfxhub_v1_2_gart_disable, .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default, .init = gfxhub_v1_2_init, - .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, + .get_xgmi_info = gfxhub_v1_2_get_xgmi_info, +}; + +static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool value; + + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) + value = false; + else + value = true; + + gfxhub_v1_2_xcc_set_fault_enable_default(adev, value, inst_mask); + + if (!amdgpu_sriov_vf(adev)) + return gfxhub_v1_2_xcc_gart_enable(adev, inst_mask); + + return 0; +} + +static int gfxhub_v1_2_xcp_suspend(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!amdgpu_sriov_vf(adev)) + gfxhub_v1_2_xcc_gart_disable(adev, inst_mask); + + return 0; +} + +struct amdgpu_xcp_ip_funcs gfxhub_v1_2_xcp_funcs = { + .suspend = &gfxhub_v1_2_xcp_suspend, + .resume = &gfxhub_v1_2_xcp_resume }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h index e2d508f5a7ee..997e9f90c990 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.h @@ -26,4 +26,6 @@ extern const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs; +extern struct amdgpu_xcp_ip_funcs gfxhub_v1_2_xcp_funcs; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 9b3a02527318..f173a61c6c15 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -120,7 +120,7 @@ static u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -282,7 +282,7 @@ static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -331,7 +331,7 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -360,7 +360,7 @@ static int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev) static void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -433,7 +433,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_0_vmhub_funcs = { static void gfxhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 4aacbbec31e2..d8fc3e8088cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -123,7 +123,7 @@ static u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -291,7 +291,7 @@ static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -340,7 +340,7 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -381,7 +381,7 @@ static int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev) static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -462,7 +462,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_1_vmhub_funcs = { static void gfxhub_v2_1_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, @@ -651,7 +651,7 @@ static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev) static void gfxhub_v2_1_halt(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; int time = 1000; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c index 13712640fa46..c53147f9c9fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c @@ -119,7 +119,7 @@ static u64 gfxhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -290,7 +290,7 @@ static void gfxhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -339,7 +339,7 @@ static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v3_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -380,7 +380,7 @@ static int gfxhub_v3_0_gart_enable(struct amdgpu_device *adev) static void gfxhub_v3_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -463,7 +463,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v3_0_vmhub_funcs = { static void gfxhub_v3_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c index 6e0bd628c889..ae777487d72e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c @@ -122,7 +122,7 @@ static u64 gfxhub_v3_0_3_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v3_0_3_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -295,7 +295,7 @@ static void gfxhub_v3_0_3_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -344,7 +344,7 @@ static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -373,7 +373,7 @@ static int gfxhub_v3_0_3_gart_enable(struct amdgpu_device *adev) static void gfxhub_v3_0_3_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -451,7 +451,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v3_0_3_vmhub_funcs = { static void gfxhub_v3_0_3_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index b213dcf8ca06..0c8a47989576 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -76,7 +76,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -84,11 +84,11 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false); break; case AMDGPU_IRQ_STATE_ENABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -96,7 +96,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true); break; default: break; @@ -139,7 +139,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault)) return 1; } @@ -149,7 +149,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if ((entry->vmid_src == AMDGPU_GFXHUB_0) && + if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0))) RREG32(hub->vm_l2_pro_fault_status); @@ -212,8 +212,7 @@ static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev) static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { - return ((vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) && + return ((vmhub == AMDGPU_MMHUB0(0)) && (!amdgpu_sriov_vf(adev))); } @@ -249,7 +248,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, unsigned int i; unsigned char hub_ip = 0; - hub_ip = (vmhub == AMDGPU_GFXHUB_0) ? + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); @@ -284,7 +283,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * Issue a dummy read to wait for the ACK register to be cleared * to avoid a false ACK due to the new fast GRBM interface. */ - if ((vmhub == AMDGPU_GFXHUB_0) && + if ((vmhub == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0))) RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, hub_ip); @@ -343,7 +342,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* For SRIOV run time, driver shouldn't access the register through MMIO * Directly use kiq to do the vm invalidation instead */ - if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes && + if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && down_read_trylock(&adev->reset_domain->sem)) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; @@ -361,19 +360,19 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, mutex_lock(&adev->mman.gtt_window_lock); - if (vmhub == AMDGPU_MMHUB_0) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); + if (vmhub == AMDGPU_MMHUB0(0)) { + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0); mutex_unlock(&adev->mman.gtt_window_lock); return; } - BUG_ON(vmhub != AMDGPU_GFXHUB_0); + BUG_ON(vmhub != AMDGPU_GFXHUB(0)); if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || amdgpu_in_reset(adev) || ring->sched.ready == false) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0); mutex_unlock(&adev->mman.gtt_window_lock); return; } @@ -383,7 +382,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * translation. Avoid this by doing the invalidation from the SDMA * itself. */ - r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity, + r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, AMDGPU_FENCE_OWNER_UNDEFINED, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, &job); @@ -415,12 +414,13 @@ error_alloc: * @pasid: pasid to be flush * @flush_type: the flush type * @all_hub: Used with PACKET3_INVALIDATE_TLBS_ALL_HUB() + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, - bool all_hub) + bool all_hub, uint32_t inst) { int vmid, i; signed long r; @@ -428,11 +428,11 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t queried_pasid; bool ret; u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; if (amdgpu_emu_mode == 0 && ring->sched.ready) { - spin_lock(&adev->gfx.kiq.ring_lock); + spin_lock(&adev->gfx.kiq[0].ring_lock); /* 2 dwords flush + 8 dwords fence */ amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); kiq->pmf->kiq_invalidate_tlbs(ring, @@ -440,12 +440,12 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) { amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&adev->gfx.kiq[0].ring_lock); return -ETIME; } amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&adev->gfx.kiq[0].ring_lock); r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); @@ -461,12 +461,12 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, &queried_pasid); if (ret && queried_pasid == pasid) { if (all_hub) { - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) gmc_v10_0_flush_gpu_tlb(adev, vmid, i, flush_type); } else { gmc_v10_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, flush_type); + AMDGPU_GFXHUB(0), flush_type); } if (!adev->enable_mes) break; @@ -534,7 +534,7 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid if (ring->is_mes_queue) return; - if (ring->vm_hub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -929,7 +929,8 @@ static int gmc_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): - adev->num_vmhubs = 2; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12, @@ -1075,9 +1076,9 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) if (!adev->in_s0ix) adev->gfxhub.funcs->set_fault_enable_default(adev, value); adev->mmhub.funcs->set_fault_enable_default(adev, value); - gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); if (!adev->in_s0ix) - gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 4116c112e8a0..c571f0d95994 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -64,7 +64,7 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -72,11 +72,11 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false); break; case AMDGPU_IRQ_STATE_ENABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -84,7 +84,7 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true); break; default: break; @@ -110,7 +110,7 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if (entry->vmid_src == AMDGPU_GFXHUB_0) + if (entry->vmid_src == AMDGPU_GFXHUB(0)) RREG32(hub->vm_l2_pro_fault_status); status = RREG32(hub->vm_l2_pro_fault_status); @@ -170,7 +170,7 @@ static void gmc_v11_0_set_irq_funcs(struct amdgpu_device *adev) static bool gmc_v11_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { - return ((vmhub == AMDGPU_MMHUB_0) && + return ((vmhub == AMDGPU_MMHUB0(0)) && (!amdgpu_sriov_vf(adev))); } @@ -202,7 +202,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, unsigned int i; unsigned char hub_ip = 0; - hub_ip = (vmhub == AMDGPU_GFXHUB_0) ? + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); @@ -251,7 +251,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, hub->eng_distance * eng, 0, hub_ip); /* Issue additional private vm invalidation to MMHUB */ - if ((vmhub != AMDGPU_GFXHUB_0) && + if ((vmhub != AMDGPU_GFXHUB(0)) && (hub->vm_l2_bank_select_reserved_cid2) && !amdgpu_sriov_vf(adev)) { inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); @@ -284,7 +284,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { - if ((vmhub == AMDGPU_GFXHUB_0) && !adev->gfx.is_poweron) + if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron) return; /* flush hdp cache */ @@ -293,7 +293,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* For SRIOV run time, driver shouldn't access the register through MMIO * Directly use kiq to do the vm invalidation instead */ - if ((adev->gfx.kiq.ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; @@ -319,23 +319,24 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * @pasid: pasid to be flush * @flush_type: the flush type * @all_hub: flush all hubs + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, - bool all_hub) + bool all_hub, uint32_t inst) { int vmid, i; signed long r; uint32_t seq; uint16_t queried_pasid; bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; if (amdgpu_emu_mode == 0 && ring->sched.ready) { - spin_lock(&adev->gfx.kiq.ring_lock); + spin_lock(&adev->gfx.kiq[0].ring_lock); /* 2 dwords flush + 8 dwords fence */ amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); kiq->pmf->kiq_invalidate_tlbs(ring, @@ -343,12 +344,12 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) { amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&adev->gfx.kiq[0].ring_lock); return -ETIME; } amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&adev->gfx.kiq[0].ring_lock); r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); @@ -364,12 +365,12 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, &queried_pasid); if (ret && queried_pasid == pasid) { if (all_hub) { - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) gmc_v11_0_flush_gpu_tlb(adev, vmid, i, flush_type); } else { gmc_v11_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, flush_type); + AMDGPU_GFXHUB(0), flush_type); } } } @@ -435,7 +436,7 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid if (ring->is_mes_queue) return; - if (ring->vm_hub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid; @@ -779,7 +780,8 @@ static int gmc_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): - adev->num_vmhubs = 2; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size, @@ -886,7 +888,7 @@ static int gmc_v11_0_sw_fini(void *handle) static void gmc_v11_0_init_golden_registers(struct amdgpu_device *adev) { if (amdgpu_sriov_vf(adev)) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32(hub->vm_contexts_disable, 0); return; @@ -921,7 +923,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) false : true; adev->mmhub.funcs->set_fault_enable_default(adev, value); - gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); + gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index b7dad4e67813..aa754c95a0b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -808,7 +808,7 @@ static int gmc_v6_0_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->num_vmhubs = 1; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 402960b0174e..acd2b407860f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -419,12 +419,13 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * @pasid: pasid to be flush * @flush_type: type of flush * @all_hub: flush all hubs + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, - bool all_hub) + bool all_hub, uint32_t inst) { int vmid; unsigned int tmp; @@ -977,7 +978,7 @@ static int gmc_v7_0_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->num_vmhubs = 1; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 504c1b34dab7..85dead2a5702 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -617,12 +617,13 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * @pasid: pasid to be flush * @flush_type: type of flush * @all_hub: flush all hubs + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, - bool all_hub) + bool all_hub, uint32_t inst) { int vmid; unsigned int tmp; @@ -1093,7 +1094,7 @@ static int gmc_v8_0_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->num_vmhubs = 1; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2fe21cefd772..67e669e0141c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -79,6 +79,7 @@ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2 +#define MAX_MEM_RANGES 8 static const char *gfxhub_client_ids[] = { "CB", @@ -481,7 +482,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - for (j = 0; j < adev->num_vmhubs; j++) { + for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -491,25 +492,25 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * fini/suspend, so the overall state doesn't * change over the course of suspend/resume. */ - if (adev->in_s0ix && (j == AMDGPU_GFXHUB_0)) + if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) continue; - if (j == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP(GC, reg); - else + if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); + else + tmp = RREG32_SOC15_IP(GC, reg); tmp &= ~bits; - if (j == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP(GC, reg, tmp); - else + if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); + else + WREG32_SOC15_IP(GC, reg, tmp); } } break; case AMDGPU_IRQ_STATE_ENABLE: - for (j = 0; j < adev->num_vmhubs; j++) { + for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -519,20 +520,20 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * fini/suspend, so the overall state doesn't * change over the course of suspend/resume. */ - if (adev->in_s0ix && (j == AMDGPU_GFXHUB_0)) + if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) continue; - if (j == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP(GC, reg); - else + if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); + else + tmp = RREG32_SOC15_IP(GC, reg); tmp |= bits; - if (j == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP(GC, reg, tmp); - else + if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); + else + WREG32_SOC15_IP(GC, reg, tmp); } } break; @@ -556,11 +557,31 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, const char *hub_name; u64 addr; uint32_t cam_index = 0; - int ret; + int ret, xcc_id = 0; + uint32_t node_id; + + node_id = entry->node_id; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; + if (entry->client_id == SOC15_IH_CLIENTID_VMC) { + hub_name = "mmhub0"; + hub = &adev->vmhub[AMDGPU_MMHUB0(node_id / 4)]; + } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { + hub_name = "mmhub1"; + hub = &adev->vmhub[AMDGPU_MMHUB1(0)]; + } else { + hub_name = "gfxhub0"; + if (adev->gfx.funcs->ih_node_to_logical_xcc) { + xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, + node_id); + if (xcc_id < 0) + xcc_id = 0; + } + hub = &adev->vmhub[xcc_id]; + } + if (retry_fault) { if (adev->irq.retry_cam_enabled) { /* Delegate it to a different ring if the hardware hasn't @@ -573,7 +594,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; - ret = amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault); + ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, + addr, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1; @@ -595,7 +617,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, + addr, write_fault)) return 1; } } @@ -603,16 +626,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - if (entry->client_id == SOC15_IH_CLIENTID_VMC) { - hub_name = "mmhub0"; - hub = &adev->vmhub[AMDGPU_MMHUB_0]; - } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { - hub_name = "mmhub1"; - hub = &adev->vmhub[AMDGPU_MMHUB_1]; - } else { - hub_name = "gfxhub0"; - hub = &adev->vmhub[AMDGPU_GFXHUB_0]; - } memset(&task_info, 0, sizeof(struct amdgpu_task_info)); amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); @@ -628,6 +641,11 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", + node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, + node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); + if (amdgpu_sriov_vf(adev)) return 0; @@ -636,7 +654,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if ((entry->vmid_src == AMDGPU_GFXHUB_0) && + if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) RREG32(hub->vm_l2_pro_fault_status); @@ -645,11 +663,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - dev_err(adev->dev, "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) { + if (entry->vmid_src == AMDGPU_GFXHUB(0)) { dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid], @@ -759,8 +776,8 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) return false; - return ((vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) && + return ((vmhub == AMDGPU_MMHUB0(0) || + vmhub == AMDGPU_MMHUB1(0)) && (!amdgpu_sriov_vf(adev)) && (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) && (adev->apu_flags & AMD_APU_IS_PICASSO)))); @@ -803,7 +820,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, u32 j, inv_req, inv_req2, tmp; struct amdgpu_vmhub *hub; - BUG_ON(vmhub >= adev->num_vmhubs); + BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS); hub = &adev->vmhub[vmhub]; if (adev->gmc.xgmi.num_physical_nodes && @@ -816,6 +833,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ inv_req = gmc_v9_0_get_invalidate_req(vmid, 2); inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); + } else if (flush_type == 2 && + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) && + adev->rev_id == 0) { + inv_req = gmc_v9_0_get_invalidate_req(vmid, 0); + inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); } else { inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); inv_req2 = 0; @@ -824,7 +846,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for a HW workaround under SRIOV as well * as GFXOFF under bare metal */ - if (adev->gfx.kiq.ring.sched.ready && + if (adev->gfx.kiq[0].ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && down_read_trylock(&adev->reset_domain->sem)) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; @@ -849,11 +871,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acquire */ - if (vmhub == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng); - else + if (vmhub >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng); - + else + tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng); if (tmp & 0x1) break; udelay(1); @@ -864,27 +885,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } do { - if (vmhub == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); - else + if (vmhub >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); + else + WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); /* * Issue a dummy read to wait for the ACK register to * be cleared to avoid a false ACK due to the new fast * GRBM interface. */ - if ((vmhub == AMDGPU_GFXHUB_0) && + if ((vmhub == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng); for (j = 0; j < adev->usec_timeout; j++) { - if (vmhub == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng); - else + if (vmhub >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng); - + else + tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng); if (tmp & (1 << vmid)) break; udelay(1); @@ -900,10 +920,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - if (vmhub == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); - else + if (vmhub >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); + else + WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); } spin_unlock(&adev->gmc.invalidate_lock); @@ -921,12 +941,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * @pasid: pasid to be flush * @flush_type: the flush type * @all_hub: flush all hubs + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, - bool all_hub) + bool all_hub, uint32_t inst) { int vmid, i; signed long r; @@ -934,8 +955,8 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t queried_pasid; bool ret; u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; if (amdgpu_in_reset(adev)) return -EIO; @@ -955,24 +976,31 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (vega20_xgmi_wa) ndw += kiq->pmf->invalidate_tlbs_size; - spin_lock(&adev->gfx.kiq.ring_lock); + spin_lock(&adev->gfx.kiq[inst].ring_lock); /* 2 dwords flush + 8 dwords fence */ amdgpu_ring_alloc(ring, ndw); if (vega20_xgmi_wa) kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); + + if (flush_type == 2 && + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) && + adev->rev_id == 0) + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, 0, all_hub); + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) { amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); up_read(&adev->reset_domain->sem); return -ETIME; } amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); @@ -989,12 +1017,12 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, &queried_pasid); if (ret && queried_pasid == pasid) { if (all_hub) { - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) gmc_v9_0_flush_gpu_tlb(adev, vmid, i, flush_type); } else { gmc_v9_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, flush_type); + AMDGPU_GFXHUB(0), flush_type); } break; } @@ -1060,10 +1088,10 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, uint32_t reg; /* Do nothing because there's no lut register for mmhub1. */ - if (ring->vm_hub == AMDGPU_MMHUB_1) + if (ring->vm_hub == AMDGPU_MMHUB1(0)) return; - if (ring->vm_hub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -1159,13 +1187,14 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; - unsigned int mtype; + struct amdgpu_vm *vm = mapping->bo_va->base.vm; + unsigned int mtype_local, mtype; bool snoop = false; + bool is_local; switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): - case IP_VERSION(9, 4, 3): if (is_vram) { if (bo_adev == adev) { if (uncached) @@ -1200,6 +1229,43 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, snoop = true; } break; + case IP_VERSION(9, 4, 3): + /* Only local VRAM BOs or system memory on non-NUMA APUs + * can be assumed to be local in their entirety. Choose + * MTYPE_NC as safe fallback for all system memory BOs on + * NUMA systems. Their MTYPE can be overridden per-page in + * gmc_v9_0_override_vm_pte_flags. + */ + mtype_local = MTYPE_RW; + if (amdgpu_mtype_local == 1) { + DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); + mtype_local = MTYPE_NC; + } else if (amdgpu_mtype_local == 2) { + DRM_INFO_ONCE("Using MTYPE_CC for local memory\n"); + mtype_local = MTYPE_CC; + } else { + DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); + } + is_local = (!is_vram && (adev->flags & AMD_IS_APU) && + num_possible_nodes() <= 1) || + (is_vram && adev == bo_adev && + KFD_XCP_MEM_ID(adev, bo->xcp_id) == vm->mem_id); + snoop = true; + if (uncached) { + mtype = MTYPE_UC; + } else if (adev->flags & AMD_IS_APU) { + mtype = is_local ? mtype_local : MTYPE_NC; + } else { + /* dGPU */ + if (is_local) + mtype = mtype_local; + else if (is_vram) + mtype = MTYPE_NC; + else + mtype = MTYPE_UC; + } + + break; default: if (uncached || coherent) mtype = MTYPE_UC; @@ -1241,6 +1307,72 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, mapping, flags); } +static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint64_t addr, uint64_t *flags) +{ + int local_node, nid; + + /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system + * memory can use more efficient MTYPEs. + */ + if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + return; + + /* Only direct-mapped memory allows us to determine the NUMA node from + * the DMA address. + */ + if (!adev->ram_is_direct_mapped) { + dev_dbg(adev->dev, "RAM is not direct mapped\n"); + return; + } + + /* Only override mappings with MTYPE_NC, which is the safe default for + * cacheable memory. + */ + if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != + AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) { + dev_dbg(adev->dev, "MTYPE is not NC\n"); + return; + } + + /* FIXME: Only supported on native mode for now. For carve-out, the + * NUMA affinity of the GPU/VM needs to come from the PCI info because + * memory partitions are not associated with different NUMA nodes. + */ + if (adev->gmc.is_app_apu && vm->mem_id >= 0) { + local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node; + } else { + dev_dbg(adev->dev, "Only native mode APU is supported.\n"); + return; + } + + /* Only handle real RAM. Mappings of PCIe resources don't have struct + * page or NUMA nodes. + */ + if (!page_is_ram(addr >> PAGE_SHIFT)) { + dev_dbg(adev->dev, "Page is not RAM.\n"); + return; + } + nid = pfn_to_nid(addr >> PAGE_SHIFT); + dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", + vm->mem_id, local_node, nid); + if (nid == local_node) { + uint64_t old_flags = *flags; + unsigned int mtype_local = MTYPE_RW; + + if (amdgpu_mtype_local == 1) + mtype_local = MTYPE_NC; + else if (amdgpu_mtype_local == 2) + mtype_local = MTYPE_CC; + + *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | + AMDGPU_PTE_MTYPE_VG10(mtype_local); + dev_dbg(adev->dev, "flags updated from %llx to %llx\n", + old_flags, *flags); + } +} + static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) { u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); @@ -1283,6 +1415,27 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) return size; } +static enum amdgpu_memory_partition +gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes) +{ + enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; + + if (adev->nbio.funcs->get_memory_partition_mode) + mode = adev->nbio.funcs->get_memory_partition_mode(adev, + supp_modes); + + return mode; +} + +static enum amdgpu_memory_partition +gmc_v9_0_query_memory_partition(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) + return AMDGPU_NPS1_PARTITION_MODE; + + return gmc_v9_0_get_memory_partition(adev, NULL); +} + static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, @@ -1291,7 +1444,9 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .map_mtype = gmc_v9_0_map_mtype, .get_vm_pde = gmc_v9_0_get_vm_pde, .get_vm_pte = gmc_v9_0_get_vm_pte, + .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags, .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size, + .query_mem_partition_mode = &gmc_v9_0_query_memory_partition, }; static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -1372,6 +1527,9 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): adev->mmhub.ras = &mmhub_v1_7_ras; break; + case IP_VERSION(1, 8, 0): + adev->mmhub.ras = &mmhub_v1_8_ras; + break; default: /* mmhub ras is not available */ break; @@ -1419,9 +1577,13 @@ static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */ - if (adev->asic_type == CHIP_VEGA20 || - adev->asic_type == CHIP_ARCTURUS) + /* + * 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined + * in their IP discovery tables + */ + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) || + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) adev->gmc.xgmi.supported = true; if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) { @@ -1430,6 +1592,20 @@ static int gmc_v9_0_early_init(void *handle) adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); } + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + enum amdgpu_pkg_type pkg_type = + adev->smuio.funcs->get_pkg_type(adev); + /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present + * and the APU, can be in used two possible modes: + * - carveout mode + * - native APU mode + * "is_app_apu" can be used to identify the APU in the native + * mode. + */ + adev->gmc.is_app_apu = (pkg_type == AMDGPU_PKG_TYPE_APU && + !pci_resource_len(adev->pdev, 0)); + } + gmc_v9_0_set_gmc_funcs(adev); gmc_v9_0_set_irq_funcs(adev); gmc_v9_0_set_umc_funcs(adev); @@ -1525,8 +1701,13 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) int r; /* size in MB on si */ - adev->gmc.mc_vram_size = - adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + if (!adev->gmc.is_app_apu) { + adev->gmc.mc_vram_size = + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + } else { + DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n"); + adev->gmc.mc_vram_size = 0; + } adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU) && @@ -1551,7 +1732,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) */ /* check whether both host-gpu and gpu-gpu xgmi links exist */ - if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || + if ((!amdgpu_sriov_vf(adev) && + (adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)) { adev->gmc.aper_base = @@ -1618,12 +1800,18 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) | AMDGPU_PTE_EXECUTABLE; - r = amdgpu_gart_table_vram_alloc(adev); - if (r) - return r; + if (!adev->gmc.real_vram_size) { + dev_info(adev->dev, "Put GART in system memory for APU\n"); + r = amdgpu_gart_table_ram_alloc(adev); + if (r) + dev_err(adev->dev, "Failed to allocate GART in system memory\n"); + } else { + r = amdgpu_gart_table_vram_alloc(adev); + if (r) + return r; - if (adev->gmc.xgmi.connected_to_cpu) { - r = amdgpu_gmc_pdb0_alloc(adev); + if (adev->gmc.xgmi.connected_to_cpu) + r = amdgpu_gmc_pdb0_alloc(adev); } return r; @@ -1644,10 +1832,178 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); } +static bool gmc_v9_0_validate_partition_info(struct amdgpu_device *adev) +{ + enum amdgpu_memory_partition mode; + u32 supp_modes; + bool valid; + + mode = gmc_v9_0_get_memory_partition(adev, &supp_modes); + + /* Mode detected by hardware not present in supported modes */ + if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && + !(BIT(mode - 1) & supp_modes)) + return false; + + switch (mode) { + case UNKNOWN_MEMORY_PARTITION_MODE: + case AMDGPU_NPS1_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 1); + break; + case AMDGPU_NPS2_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 2); + break; + case AMDGPU_NPS4_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 3 || + adev->gmc.num_mem_partitions == 4); + break; + default: + valid = false; + } + + return valid; +} + +static bool gmc_v9_0_is_node_present(int *node_ids, int num_ids, int nid) +{ + int i; + + /* Check if node with id 'nid' is present in 'node_ids' array */ + for (i = 0; i < num_ids; ++i) + if (node_ids[i] == nid) + return true; + + return false; +} + +static void +gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges) +{ + int num_ranges = 0, ret, mem_groups; + struct amdgpu_numa_info numa_info; + int node_ids[MAX_MEM_RANGES]; + int num_xcc, xcc_id; + uint32_t xcc_mask; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + xcc_mask = (1U << num_xcc) - 1; + mem_groups = hweight32(adev->aid_mask); + + for_each_inst(xcc_id, xcc_mask) { + ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info); + if (ret) + continue; + + if (numa_info.nid == NUMA_NO_NODE) { + mem_ranges[0].size = numa_info.size; + mem_ranges[0].numa.node = numa_info.nid; + num_ranges = 1; + break; + } + + if (gmc_v9_0_is_node_present(node_ids, num_ranges, + numa_info.nid)) + continue; + + node_ids[num_ranges] = numa_info.nid; + mem_ranges[num_ranges].numa.node = numa_info.nid; + mem_ranges[num_ranges].size = numa_info.size; + ++num_ranges; + } + + adev->gmc.num_mem_partitions = num_ranges; + + /* If there is only partition, don't use entire size */ + if (adev->gmc.num_mem_partitions == 1) { + mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1); + do_div(mem_ranges[0].size, mem_groups); + } +} + +static void +gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges) +{ + enum amdgpu_memory_partition mode; + u32 start_addr = 0, size; + int i; + + mode = gmc_v9_0_query_memory_partition(adev); + + switch (mode) { + case UNKNOWN_MEMORY_PARTITION_MODE: + case AMDGPU_NPS1_PARTITION_MODE: + adev->gmc.num_mem_partitions = 1; + break; + case AMDGPU_NPS2_PARTITION_MODE: + adev->gmc.num_mem_partitions = 2; + break; + case AMDGPU_NPS4_PARTITION_MODE: + if (adev->flags & AMD_IS_APU) + adev->gmc.num_mem_partitions = 3; + else + adev->gmc.num_mem_partitions = 4; + break; + default: + adev->gmc.num_mem_partitions = 1; + break; + } + + size = adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT; + size /= adev->gmc.num_mem_partitions; + + for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { + mem_ranges[i].range.fpfn = start_addr; + mem_ranges[i].size = ((u64)size << AMDGPU_GPU_PAGE_SHIFT); + mem_ranges[i].range.lpfn = start_addr + size - 1; + start_addr += size; + } + + /* Adjust the last one */ + mem_ranges[adev->gmc.num_mem_partitions - 1].range.lpfn = + (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1; + mem_ranges[adev->gmc.num_mem_partitions - 1].size = + adev->gmc.real_vram_size - + ((u64)mem_ranges[adev->gmc.num_mem_partitions - 1].range.fpfn + << AMDGPU_GPU_PAGE_SHIFT); +} + +static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) +{ + bool valid; + + adev->gmc.mem_partitions = kzalloc( + MAX_MEM_RANGES * sizeof(struct amdgpu_mem_partition_info), + GFP_KERNEL); + + if (!adev->gmc.mem_partitions) + return -ENOMEM; + + /* TODO : Get the range from PSP/Discovery for dGPU */ + if (adev->gmc.is_app_apu) + gmc_v9_0_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions); + else + gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); + + if (amdgpu_sriov_vf(adev)) + valid = true; + else + valid = gmc_v9_0_validate_partition_info(adev); + if (!valid) { + /* TODO: handle invalid case */ + dev_WARN(adev->dev, + "Mem ranges not matching with hardware config"); + } + + return 0; +} + static int gmc_v9_0_sw_init(void *handle) { int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned long inst_mask = adev->aid_mask; adev->gfxhub.funcs->init(adev); @@ -1655,38 +2011,54 @@ static int gmc_v9_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - r = amdgpu_atomfirmware_get_vram_info(adev, - &vram_width, &vram_type, &vram_vendor); - if (amdgpu_sriov_vf(adev)) - /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, - * and DF related registers is not readable, seems hardcord is the - * only way to set the correct vram_width - */ - adev->gmc.vram_width = 2048; - else if (amdgpu_emu_mode != 1) - adev->gmc.vram_width = vram_width; + if (!(adev->bios) || adev->gmc.is_app_apu) { + if (adev->flags & AMD_IS_APU) { + if (adev->gmc.is_app_apu) { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; + adev->gmc.vram_width = 128 * 64; + } else { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4; + adev->gmc.vram_width = 64 * 64; + } + } else { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; + adev->gmc.vram_width = 128 * 64; + } + } else { + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + if (amdgpu_sriov_vf(adev)) + /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, + * and DF related registers is not readable, seems hardcord is the + * only way to set the correct vram_width + */ + adev->gmc.vram_width = 2048; + else if (amdgpu_emu_mode != 1) + adev->gmc.vram_width = vram_width; - if (!adev->gmc.vram_width) { - int chansize, numchan; + if (!adev->gmc.vram_width) { + int chansize, numchan; - /* hbm memory channel size */ - if (adev->flags & AMD_IS_APU) - chansize = 64; - else - chansize = 128; - if (adev->df.funcs && - adev->df.funcs->get_hbm_channel_number) { - numchan = adev->df.funcs->get_hbm_channel_number(adev); - adev->gmc.vram_width = numchan * chansize; + /* hbm memory channel size */ + if (adev->flags & AMD_IS_APU) + chansize = 64; + else + chansize = 128; + if (adev->df.funcs && + adev->df.funcs->get_hbm_channel_number) { + numchan = adev->df.funcs->get_hbm_channel_number(adev); + adev->gmc.vram_width = numchan * chansize; + } } - } - adev->gmc.vram_type = vram_type; - adev->gmc.vram_vendor = vram_vendor; + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; + } switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): - adev->num_vmhubs = 2; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); @@ -1702,9 +2074,8 @@ static int gmc_v9_0_sw_init(void *handle) case IP_VERSION(9, 4, 0): case IP_VERSION(9, 3, 0): case IP_VERSION(9, 4, 2): - case IP_VERSION(9, 4, 3): - adev->num_vmhubs = 2; - + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* * To fulfill 4-level page support, @@ -1720,12 +2091,24 @@ static int gmc_v9_0_sw_init(void *handle) adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 1): - adev->num_vmhubs = 3; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask); /* Keep the vm size same with Vega20 */ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; + case IP_VERSION(9, 4, 3): + bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), + NUM_XCC(adev->gfx.xcc_mask)); + + inst_mask <<= AMDGPU_MMHUB0(0); + bitmap_or(adev->vmhubs_mask, adev->vmhubs_mask, &inst_mask, 32); + + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + adev->gmc.translate_further = adev->vm_manager.num_level > 1; + break; default: break; } @@ -1764,7 +2147,7 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - dma_addr_bits = adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ? 48:44; + dma_addr_bits = adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) ? 48:44; r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits)); if (r) { printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); @@ -1778,6 +2161,12 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_gmc_get_vbios_allocations(adev); + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + r = gmc_v9_0_init_mem_ranges(adev); + if (r) + return r; + } + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -1810,6 +2199,9 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + amdgpu_gmc_sysfs_init(adev); + return 0; } @@ -1817,10 +2209,20 @@ static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + amdgpu_gmc_sysfs_fini(adev); + adev->gmc.num_mem_partitions = 0; + kfree(adev->gmc.mem_partitions); + amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); - amdgpu_gart_table_vram_free(adev); + if (!adev->gmc.real_vram_size) { + dev_info(adev->dev, "Put GART in system memory for APU free\n"); + amdgpu_gart_table_ram_free(adev); + } else { + amdgpu_gart_table_vram_free(adev); + } amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); @@ -1946,8 +2348,8 @@ static int gmc_v9_0_hw_init(void *handle) adev->gfxhub.funcs->set_fault_enable_default(adev, value); adev->mmhub.funcs->set_fault_enable_default(adev, value); } - for (i = 0; i < adev->num_vmhubs; ++i) { - if (adev->in_s0ix && (i == AMDGPU_GFXHUB_0)) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { + if (adev->in_s0ix && (i == AMDGPU_GFXHUB(0))) continue; gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index a3076eb8af6a..77595e9622da 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -437,7 +437,7 @@ static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case 126: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -460,6 +460,7 @@ int jpeg_v1_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->jpeg.num_jpeg_inst = 1; + adev->jpeg.num_jpeg_rings = 1; jpeg_v1_0_set_dec_ring_funcs(adev); jpeg_v1_0_set_irq_funcs(adev); @@ -484,15 +485,15 @@ int jpeg_v1_0_sw_init(void *handle) if (r) return r; - ring = &adev->jpeg.inst->ring_dec; - ring->vm_hub = AMDGPU_MMHUB_0; + ring = adev->jpeg.inst->ring_dec; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) return r; - adev->jpeg.internal.jpeg_pitch = adev->jpeg.inst->external.jpeg_pitch = + adev->jpeg.internal.jpeg_pitch[0] = adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); return 0; @@ -509,7 +510,7 @@ void jpeg_v1_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_ring_fini(&adev->jpeg.inst[0].ring_dec); + amdgpu_ring_fini(adev->jpeg.inst->ring_dec); } /** @@ -522,7 +523,7 @@ void jpeg_v1_0_sw_fini(void *handle) */ void jpeg_v1_0_start(struct amdgpu_device *adev, int mode) { - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; if (mode == 0) { WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0); @@ -579,7 +580,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec.funcs = &jpeg_v1_0_decode_ring_vm_funcs; + adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs; DRM_INFO("JPEG decode is enabled in VM mode\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 0eddf7c824a7..c25d4a07350b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -49,6 +49,7 @@ static int jpeg_v2_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->jpeg.num_jpeg_inst = 1; + adev->jpeg.num_jpeg_rings = 1; jpeg_v2_0_set_dec_ring_funcs(adev); jpeg_v2_0_set_irq_funcs(adev); @@ -83,18 +84,18 @@ static int jpeg_v2_0_sw_init(void *handle) if (r) return r; - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) return r; - adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); + adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); return 0; } @@ -129,7 +130,7 @@ static int jpeg_v2_0_sw_fini(void *handle) static int jpeg_v2_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, @@ -312,7 +313,7 @@ static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device *adev) */ static int jpeg_v2_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; if (adev->pm.dpm_enabled) @@ -729,7 +730,7 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -791,7 +792,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec.funcs = &jpeg_v2_0_dec_ring_vm_funcs; + adev->jpeg.inst->ring_dec->funcs = &jpeg_v2_0_dec_ring_vm_funcs; DRM_INFO("JPEG decode is enabled in VM mode\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 73e0dc5a10cd..aadb74de52bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -60,6 +60,7 @@ static int jpeg_v2_5_early_init(void *handle) u32 harvest; int i; + adev->jpeg.num_jpeg_rings = 1; adev->jpeg.num_jpeg_inst = JPEG25_MAX_HW_INSTANCES_ARCTURUS; for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { harvest = RREG32_SOC15(JPEG, i, mmCC_UVD_HARVESTING); @@ -125,12 +126,12 @@ static int jpeg_v2_5_sw_init(void *handle) if (adev->jpeg.harvest_config & (1 << i)) continue; - ring = &adev->jpeg.inst[i].ring_dec; + ring = adev->jpeg.inst[i].ring_dec; ring->use_doorbell = true; if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i; sprintf(ring->name, "jpeg_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, @@ -138,8 +139,8 @@ static int jpeg_v2_5_sw_init(void *handle) if (r) return r; - adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst[i].external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH); + adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH); } r = amdgpu_jpeg_ras_sw_init(adev); @@ -186,7 +187,7 @@ static int jpeg_v2_5_hw_init(void *handle) if (adev->jpeg.harvest_config & (1 << i)) continue; - ring = &adev->jpeg.inst[i].ring_dec; + ring = adev->jpeg.inst[i].ring_dec; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i); @@ -329,7 +330,7 @@ static int jpeg_v2_5_start(struct amdgpu_device *adev) if (adev->jpeg.harvest_config & (1 << i)) continue; - ring = &adev->jpeg.inst[i].ring_dec; + ring = adev->jpeg.inst[i].ring_dec; /* disable anti hang mechanism */ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); @@ -602,7 +603,7 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec); + amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -719,10 +720,10 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) if (adev->jpeg.harvest_config & (1 << i)) continue; if (adev->asic_type == CHIP_ARCTURUS) - adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_5_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_5_dec_ring_vm_funcs; else /* CHIP_ALDEBARAN */ - adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_6_dec_ring_vm_funcs; - adev->jpeg.inst[i].ring_dec.me = i; + adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_6_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec->me = i; DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i); } } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 1c2292cc5f2c..79791379fc2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -64,6 +64,7 @@ static int jpeg_v3_0_early_init(void *handle) } adev->jpeg.num_jpeg_inst = 1; + adev->jpeg.num_jpeg_rings = 1; jpeg_v3_0_set_dec_ring_funcs(adev); jpeg_v3_0_set_irq_funcs(adev); @@ -98,18 +99,18 @@ static int jpeg_v3_0_sw_init(void *handle) if (r) return r; - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) return r; - adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); + adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); return 0; } @@ -144,7 +145,7 @@ static int jpeg_v3_0_sw_fini(void *handle) static int jpeg_v3_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, @@ -330,7 +331,7 @@ static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device *adev) */ static int jpeg_v3_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; if (adev->pm.dpm_enabled) @@ -527,7 +528,7 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -589,7 +590,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec.funcs = &jpeg_v3_0_dec_ring_vm_funcs; + adev->jpeg.inst->ring_dec->funcs = &jpeg_v3_0_dec_ring_vm_funcs; DRM_INFO("JPEG decode is enabled in VM mode\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index a3d83c9f2c9a..a707d407fbd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -58,6 +58,7 @@ static int jpeg_v4_0_early_init(void *handle) adev->jpeg.num_jpeg_inst = 1; + adev->jpeg.num_jpeg_rings = 1; jpeg_v4_0_set_dec_ring_funcs(adev); jpeg_v4_0_set_irq_funcs(adev); @@ -105,10 +106,10 @@ static int jpeg_v4_0_sw_init(void *handle) if (r) return r; - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1); - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, @@ -116,8 +117,8 @@ static int jpeg_v4_0_sw_init(void *handle) if (r) return r; - adev->jpeg.internal.jpeg_pitch = regUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); r = amdgpu_jpeg_ras_sw_init(adev); if (r) @@ -156,7 +157,7 @@ static int jpeg_v4_0_sw_fini(void *handle) static int jpeg_v4_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; if (amdgpu_sriov_vf(adev)) { @@ -364,7 +365,7 @@ static int jpeg_v4_0_enable_static_power_gating(struct amdgpu_device *adev) */ static int jpeg_v4_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; if (adev->pm.dpm_enabled) @@ -442,7 +443,7 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev) table_size = 0; - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW), @@ -687,7 +688,7 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case VCN_4_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; default: DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", @@ -749,7 +750,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec.funcs = &jpeg_v4_0_dec_ring_vm_funcs; + adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_dec_ring_vm_funcs; DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c new file mode 100644 index 000000000000..ce2b22f7e4e4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -0,0 +1,1074 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v4_0_3.h" + +#include "vcn/vcn_4_0_3_offset.h" +#include "vcn/vcn_4_0_3_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" + +enum jpeg_engin_status { + UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, + UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, +}; + +static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v4_0_3_set_powergating_state(void *handle, + enum amd_powergating_state state); +static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); + +static int amdgpu_ih_srcid_jpeg[] = { + VCN_4_0__SRCID__JPEG_DECODE, + VCN_4_0__SRCID__JPEG1_DECODE, + VCN_4_0__SRCID__JPEG2_DECODE, + VCN_4_0__SRCID__JPEG3_DECODE, + VCN_4_0__SRCID__JPEG4_DECODE, + VCN_4_0__SRCID__JPEG5_DECODE, + VCN_4_0__SRCID__JPEG6_DECODE, + VCN_4_0__SRCID__JPEG7_DECODE +}; + +/** + * jpeg_v4_0_3_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int jpeg_v4_0_3_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + + jpeg_v4_0_3_set_dec_ring_funcs(adev); + jpeg_v4_0_3_set_irq_funcs(adev); + jpeg_v4_0_3_set_ras_funcs(adev); + + return 0; +} + +/** + * jpeg_v4_0_3_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int jpeg_v4_0_3_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, j, r, jpeg_inst; + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq); + if (r) + return r; + } + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + ring->use_doorbell = true; + ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 1 + j + 9 * jpeg_inst; + sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[j] = + regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch[j] = + SOC15_REG_OFFSET1( + JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_SCRATCH0, + (j ? (0x40 * j - 0xc80) : 0)); + } + } + + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { + r = amdgpu_jpeg_ras_sw_init(adev); + if (r) { + dev_err(adev->dev, "Failed to initialize jpeg ras block!\n"); + return r; + } + } + + return 0; +} + +/** + * jpeg_v4_0_3_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v4_0_3_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v4_0_3_hw_init - start and test JPEG block + * + * @handle: amdgpu_device pointer + * + */ +static int jpeg_v4_0_3_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, j, r, jpeg_inst; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + + ring = adev->jpeg.inst[i].ring_dec; + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range( + adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * jpeg_inst, + adev->jpeg.inst[i].aid_id); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + if (ring->use_doorbell) + WREG32_SOC15_OFFSET( + VCN, GET_INST(VCN, i), + regVCN_JPEG_DB_CTRL, + (ring->pipe ? (ring->pipe - 0x15) : 0), + ring->doorbell_index + << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + } + DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); + + return 0; +} + +/** + * jpeg_v4_0_3_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v4_0_3_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 0; + + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) + ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + + return ret; +} + +/** + * jpeg_v4_0_3_suspend - suspend JPEG block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend JPEG block + */ +static int jpeg_v4_0_3_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = jpeg_v4_0_3_hw_fini(adev); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v4_0_3_resume - resume JPEG block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v4_0_3_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v4_0_3_hw_init(adev); + + return r; +} + +static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx) +{ + int i, jpeg_inst; + uint32_t data; + + jpeg_inst = GET_INST(JPEG, inst_idx); + data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1)); + } else { + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + } + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE); + data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) + data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); +} + +static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx) +{ + int i, jpeg_inst; + uint32_t data; + + jpeg_inst = GET_INST(JPEG, inst_idx); + data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1); + } else { + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + } + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) + data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); +} + +/** + * jpeg_v4_0_3_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v4_0_3_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int i, j, jpeg_inst; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + + WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, + 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG( + JPEG, jpeg_inst, regUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDJ_PWR_ON + << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, + regUVD_JPEG_POWER_STATUS), + 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + jpeg_v4_0_3_disable_clock_gating(adev, i); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); + + ring = &adev->jpeg.inst[i].ring_dec[j]; + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, + regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC0_MASK << j, + ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j)); + + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_CNTL, + reg_offset, + (0x00000001L | 0x00000002L)); + WREG32_SOC15_OFFSET( + JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, + reg_offset, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET( + JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + reg_offset, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_RPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_WPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_CNTL, + reg_offset, 0x00000002L); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_SIZE, + reg_offset, ring->ring_size / 4); + ring->wptr = RREG32_SOC15_OFFSET( + JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR, + reg_offset); + } + } + + return 0; +} + +/** + * jpeg_v4_0_3_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v4_0_3_stop(struct amdgpu_device *adev) +{ + int i, jpeg_inst; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v4_0_3_enable_clock_gating(adev, i); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, + regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, + 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG( + JPEG, jpeg_inst, regUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDJ_PWR_OFF + << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); + } + + return 0; +} + +/** + * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15_OFFSET( + JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR, + ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); +} + +/** + * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15_OFFSET( + JPEG, GET_INST(JPEG, ring->me), + regUVD_JRBC0_UVD_JRBC_RB_WPTR, + ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); +} + +/** + * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), + regUVD_JRBC0_UVD_JRBC_RB_WPTR, + (ring->pipe ? (0x40 * ring->pipe - 0xc80) : + 0), + lower_32_bits(ring->wptr)); + } +} + +/** + * jpeg_v4_0_3_dec_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x80004000); +} + +/** + * jpeg_v4_0_3_dec_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x00004000); +} + +/** + * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @addr: address + * @seq: sequence number + * @flags: fence related flags + * + * Write a fence and a trap command to the ring. + */ +static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned int flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); + amdgpu_ring_write(ring, 0); + + if (ring->adev->jpeg.inst[ring->me].aid_id) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x4); + } else { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x3fbc); + + if (ring->adev->jpeg.inst[ring->me].aid_id) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x0); + } else { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x1); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); + amdgpu_ring_write(ring, 0); +} + +/** + * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from + * @ib: indirect buffer to execute + * @flags: unused + * + * Write ring commands to execute the indirect buffer. + */ +static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned int vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, ib->length_dw); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x2); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); + amdgpu_ring_write(ring, 0x2); +} + +static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE3)); + } + amdgpu_ring_write(ring, mask); +} + +static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + } + amdgpu_ring_write(ring, val); +} + +static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } +} + +static bool jpeg_v4_0_3_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool ret = false; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); + + ret &= ((RREG32_SOC15_OFFSET( + JPEG, GET_INST(JPEG, i), + regUVD_JRBC0_UVD_JRBC_STATUS, + reg_offset) & + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } + } + + return ret; +} + +static int jpeg_v4_0_3_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 0; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); + + ret &= SOC15_WAIT_ON_RREG_OFFSET( + JPEG, GET_INST(JPEG, i), + regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset, + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } + } + return ret; +} + +static int jpeg_v4_0_3_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (enable) { + if (!jpeg_v4_0_3_is_idle(handle)) + return -EBUSY; + jpeg_v4_0_3_enable_clock_gating(adev, i); + } else { + jpeg_v4_0_3_disable_clock_gating(adev, i); + } + } + return 0; +} + +static int jpeg_v4_0_3_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v4_0_3_stop(adev); + else + ret = jpeg_v4_0_3_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t i, inst; + + i = node_id_to_phys_map[entry->node_id]; + DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst) + if (adev->jpeg.inst[inst].aid_id == i) + break; + + if (inst >= adev->jpeg.num_jpeg_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown JPEG instance %d", + entry->node_id); + return 0; + } + + switch (entry->src_id) { + case VCN_4_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]); + break; + case VCN_4_0__SRCID__JPEG1_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]); + break; + case VCN_4_0__SRCID__JPEG2_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]); + break; + case VCN_4_0__SRCID__JPEG3_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]); + break; + case VCN_4_0__SRCID__JPEG4_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]); + break; + case VCN_4_0__SRCID__JPEG5_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]); + break; + case VCN_4_0__SRCID__JPEG6_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]); + break; + case VCN_4_0__SRCID__JPEG7_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { + .name = "jpeg_v4_0_3", + .early_init = jpeg_v4_0_3_early_init, + .late_init = NULL, + .sw_init = jpeg_v4_0_3_sw_init, + .sw_fini = jpeg_v4_0_3_sw_fini, + .hw_init = jpeg_v4_0_3_hw_init, + .hw_fini = jpeg_v4_0_3_hw_fini, + .suspend = jpeg_v4_0_3_suspend, + .resume = jpeg_v4_0_3_resume, + .is_idle = jpeg_v4_0_3_is_idle, + .wait_for_idle = jpeg_v4_0_3_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, + .set_powergating_state = jpeg_v4_0_3_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, + .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, + .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */ + 22 + 22 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */ + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v4_0_3_dec_ring_nop, + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i, j, jpeg_inst; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec[j].me = i; + adev->jpeg.inst[i].ring_dec[j].pipe = j; + } + jpeg_inst = GET_INST(JPEG, i); + adev->jpeg.inst[i].aid_id = + jpeg_inst / adev->jpeg.num_inst_per_aid; + } + DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = { + .set = jpeg_v4_0_3_set_interrupt_state, + .process = jpeg_v4_0_3_process_interrupt, +}; + +static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; + } + adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = { + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 4, + .minor = 0, + .rev = 3, + .funcs = &jpeg_v4_0_3_ip_funcs, +}; + +static const struct amdgpu_ras_err_status_reg_entry jpeg_v4_0_3_ue_reg_list[] = { + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0S, regVCN_UE_ERR_STATUS_HI_JPEG0S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0D, regVCN_UE_ERR_STATUS_HI_JPEG0D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1S, regVCN_UE_ERR_STATUS_HI_JPEG1S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1D, regVCN_UE_ERR_STATUS_HI_JPEG1D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2S, regVCN_UE_ERR_STATUS_HI_JPEG2S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2D, regVCN_UE_ERR_STATUS_HI_JPEG2D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3S, regVCN_UE_ERR_STATUS_HI_JPEG3S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3D, regVCN_UE_ERR_STATUS_HI_JPEG3D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4S, regVCN_UE_ERR_STATUS_HI_JPEG4S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4D, regVCN_UE_ERR_STATUS_HI_JPEG4D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5S, regVCN_UE_ERR_STATUS_HI_JPEG5S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5D, regVCN_UE_ERR_STATUS_HI_JPEG5D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6S, regVCN_UE_ERR_STATUS_HI_JPEG6S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6D, regVCN_UE_ERR_STATUS_HI_JPEG6D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7S, regVCN_UE_ERR_STATUS_HI_JPEG7S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7D, regVCN_UE_ERR_STATUS_HI_JPEG7D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7D"}, +}; + +static void jpeg_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev, + uint32_t jpeg_inst, + void *ras_err_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status; + + /* jpeg v4_0_3 only support uncorrectable errors */ + amdgpu_ras_inst_query_ras_error_count(adev, + jpeg_v4_0_3_ue_reg_list, + ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list), + NULL, 0, GET_INST(VCN, jpeg_inst), + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + &err_data->ue_count); +} + +static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev, + void *ras_err_status) +{ + uint32_t i; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { + dev_warn(adev->dev, "JPEG RAS is not supported\n"); + return; + } + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) + jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status); +} + +static void jpeg_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev, + uint32_t jpeg_inst) +{ + amdgpu_ras_inst_reset_ras_error_count(adev, + jpeg_v4_0_3_ue_reg_list, + ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list), + GET_INST(VCN, jpeg_inst)); +} + +static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev) +{ + uint32_t i; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { + dev_warn(adev->dev, "JPEG RAS is not supported\n"); + return; + } + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) + jpeg_v4_0_3_inst_reset_ras_error_count(adev, i); +} + +static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = { + .query_ras_error_count = jpeg_v4_0_3_query_ras_error_count, + .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count, +}; + +static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = { + .ras_block = { + .hw_ops = &jpeg_v4_0_3_ras_hw_ops, + }, +}; + +static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.ras = &jpeg_v4_0_3_ras; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h new file mode 100644 index 000000000000..22483dc66351 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -0,0 +1,51 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V4_0_3_H__ +#define __JPEG_V4_0_3_H__ + +#define regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff +#define regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x404d +#define regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x404e +#define regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x404f +#define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ab +#define regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ac +#define regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40a4 +#define regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40a6 +#define regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40b6 +#define regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40b7 +#define regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082 +#define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x42d4 +#define regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x42d5 +#define regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085 +#define regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 +#define regUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 +#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x4043 +#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094 +#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe + +#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 + +extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; + +#endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 2e2062636d5f..36a123e6c8ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -149,7 +149,7 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, { struct amdgpu_device *adev = mes->adev; union MESAPI__ADD_QUEUE mes_add_queue_pkt; - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -632,6 +632,8 @@ static int mes_v10_1_mqd_init(struct amdgpu_ring *ring) uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; uint32_t tmp; + memset(mqd, 0, sizeof(*mqd)); + mqd->header = 0xC0310800; mqd->compute_pipelinestat_enable = 0x00000001; mqd->compute_static_thread_mgmt_se0 = 0xffffffff; @@ -728,6 +730,7 @@ static int mes_v10_1_mqd_init(struct amdgpu_ring *ring) /* offset: 184 - this is used for CP_HQD_GFX_CONTROL */ mqd->cp_hqd_suspend_cntl_stack_offset = tmp; + amdgpu_device_flush_hdp(ring->adev, NULL); return 0; } @@ -797,8 +800,8 @@ static void mes_v10_1_queue_init_register(struct amdgpu_ring *ring) static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; int r; if (!kiq->pmf || !kiq->pmf->kiq_map_queues) @@ -812,13 +815,7 @@ static int mes_v10_1_kiq_enable_queue(struct amdgpu_device *adev) kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); - r = amdgpu_ring_test_ring(kiq_ring); - if (r) { - DRM_ERROR("kfq enable failed\n"); - kiq_ring->sched.ready = false; - } - - return r; + return amdgpu_ring_test_helper(kiq_ring); } static int mes_v10_1_queue_init(struct amdgpu_device *adev) @@ -863,9 +860,9 @@ static int mes_v10_1_kiq_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - spin_lock_init(&adev->gfx.kiq.ring_lock); + spin_lock_init(&adev->gfx.kiq[0].ring_lock); - ring = &adev->gfx.kiq.ring; + ring = &adev->gfx.kiq[0].ring; ring->me = 3; ring->pipe = 1; @@ -891,7 +888,7 @@ static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev, struct amdgpu_ring *ring; if (pipe == AMDGPU_MES_KIQ_PIPE) - ring = &adev->gfx.kiq.ring; + ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) ring = &adev->mes.ring; else @@ -901,6 +898,7 @@ static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev, return 0; r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); if (r) { @@ -974,15 +972,15 @@ static int mes_v10_1_sw_fini(void *handle) amdgpu_ucode_release(&adev->mes.fw[pipe]); } - amdgpu_bo_free_kernel(&adev->gfx.kiq.ring.mqd_obj, - &adev->gfx.kiq.ring.mqd_gpu_addr, - &adev->gfx.kiq.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, &adev->mes.ring.mqd_gpu_addr, &adev->mes.ring.mqd_ptr); - amdgpu_ring_fini(&adev->gfx.kiq.ring); + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); amdgpu_ring_fini(&adev->mes.ring); amdgpu_mes_fini(adev); @@ -1038,7 +1036,7 @@ static int mes_v10_1_kiq_hw_init(struct amdgpu_device *adev) mes_v10_1_enable(adev, true); - mes_v10_1_kiq_setting(&adev->gfx.kiq.ring); + mes_v10_1_kiq_setting(&adev->gfx.kiq[0].ring); r = mes_v10_1_queue_init(adev); if (r) @@ -1090,7 +1088,7 @@ static int mes_v10_1_hw_init(void *handle) * MES uses KIQ ring exclusively so driver cannot access KIQ ring * with MES enabled. */ - adev->gfx.kiq.ring.sched.ready = false; + adev->gfx.kiq[0].ring.sched.ready = false; adev->mes.ring.sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 45280f047180..1bdaa00c0b46 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -164,7 +164,7 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, { struct amdgpu_device *adev = mes->adev; union MESAPI__ADD_QUEUE mes_add_queue_pkt; - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -202,17 +202,14 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gws_size = input->gws_size; mes_add_queue_pkt.trap_handler_addr = input->tba_addr; mes_add_queue_pkt.tma_addr = input->tma_addr; + mes_add_queue_pkt.trap_en = input->trap_en; + mes_add_queue_pkt.skip_process_ctx_clear = input->skip_process_ctx_clear; mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; mes_add_queue_pkt.gds_size = input->queue_size; - if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 4) && - (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) && - (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3)))) - mes_add_queue_pkt.trap_en = 1; - /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; mes_add_queue_pkt.gds_size = input->queue_size; @@ -339,6 +336,19 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; break; + case MES_MISC_OP_SET_SHADER_DEBUGGER: + misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER; + misc_pkt.set_shader_debugger.process_context_addr = + input->set_shader_debugger.process_context_addr; + misc_pkt.set_shader_debugger.flags.u32all = + input->set_shader_debugger.flags.u32all; + misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl = + input->set_shader_debugger.spi_gdbg_per_vmid_cntl; + memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl, + input->set_shader_debugger.tcp_watch_cntl, + sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); + misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; + break; default: DRM_ERROR("unsupported misc op (%d) \n", input->op); return -EINVAL; @@ -704,6 +714,8 @@ static int mes_v11_0_mqd_init(struct amdgpu_ring *ring) uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; uint32_t tmp; + memset(mqd, 0, sizeof(*mqd)); + mqd->header = 0xC0310800; mqd->compute_pipelinestat_enable = 0x00000001; mqd->compute_static_thread_mgmt_se0 = 0xffffffff; @@ -797,6 +809,7 @@ static int mes_v11_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_iq_timer = regCP_HQD_IQ_TIMER_DEFAULT; mqd->cp_hqd_quantum = regCP_HQD_QUANTUM_DEFAULT; + amdgpu_device_flush_hdp(ring->adev, NULL); return 0; } @@ -864,8 +877,8 @@ static void mes_v11_0_queue_init_register(struct amdgpu_ring *ring) static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; int r; if (!kiq->pmf || !kiq->pmf->kiq_map_queues) @@ -879,12 +892,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); - r = amdgpu_ring_test_ring(kiq_ring); - if (r) { - DRM_ERROR("kfq enable failed\n"); - kiq_ring->sched.ready = false; - } - return r; + return amdgpu_ring_test_helper(kiq_ring); } static int mes_v11_0_queue_init(struct amdgpu_device *adev, @@ -894,7 +902,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, int r; if (pipe == AMDGPU_MES_KIQ_PIPE) - ring = &adev->gfx.kiq.ring; + ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) ring = &adev->mes.ring; else @@ -961,9 +969,9 @@ static int mes_v11_0_kiq_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - spin_lock_init(&adev->gfx.kiq.ring_lock); + spin_lock_init(&adev->gfx.kiq[0].ring_lock); - ring = &adev->gfx.kiq.ring; + ring = &adev->gfx.kiq[0].ring; ring->me = 3; ring->pipe = 1; @@ -989,7 +997,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, struct amdgpu_ring *ring; if (pipe == AMDGPU_MES_KIQ_PIPE) - ring = &adev->gfx.kiq.ring; + ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) ring = &adev->mes.ring; else @@ -999,6 +1007,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, return 0; r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); if (r) { @@ -1074,15 +1083,15 @@ static int mes_v11_0_sw_fini(void *handle) amdgpu_ucode_release(&adev->mes.fw[pipe]); } - amdgpu_bo_free_kernel(&adev->gfx.kiq.ring.mqd_obj, - &adev->gfx.kiq.ring.mqd_gpu_addr, - &adev->gfx.kiq.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, &adev->mes.ring.mqd_gpu_addr, &adev->mes.ring.mqd_ptr); - amdgpu_ring_fini(&adev->gfx.kiq.ring); + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); amdgpu_ring_fini(&adev->mes.ring); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1175,7 +1184,7 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev) mes_v11_0_enable(adev, true); - mes_v11_0_kiq_setting(&adev->gfx.kiq.ring); + mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring); r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE); if (r) @@ -1196,7 +1205,7 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) } if (amdgpu_sriov_vf(adev)) { - mes_v11_0_kiq_dequeue(&adev->gfx.kiq.ring); + mes_v11_0_kiq_dequeue(&adev->gfx.kiq[0].ring); mes_v11_0_kiq_clear(adev); } @@ -1244,7 +1253,7 @@ static int mes_v11_0_hw_init(void *handle) * MES uses KIQ ring exclusively so driver cannot access KIQ ring * with MES enabled. */ - adev->gfx.kiq.ring.sched.ready = false; + adev->gfx.kiq[0].ring.sched.ready = false; adev->mes.ring.sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 15e7cbeae75b..fb91b31056ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -54,7 +54,7 @@ static u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) static void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -229,7 +229,7 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -285,7 +285,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -338,7 +338,7 @@ static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) static void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -415,7 +415,7 @@ static void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 73afbf2facc9..9086f2fdfaf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -54,7 +54,7 @@ static u64 mmhub_v1_7_get_fb_location(struct amdgpu_device *adev) static void mmhub_v1_7_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); @@ -261,7 +261,7 @@ static void mmhub_v1_7_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -319,7 +319,7 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v1_7_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -348,7 +348,7 @@ static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev) static void mmhub_v1_7_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -425,7 +425,7 @@ static void mmhub_v1_7_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v1_7_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 342d1702104c..5e8b493f8699 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -29,6 +29,7 @@ #include "soc15_common.h" #include "soc15.h" +#include "amdgpu_ras.h" #define regVM_L2_CNTL3_DEFAULT 0x80100007 #define regVM_L2_CNTL4_DEFAULT 0x000000c1 @@ -53,18 +54,30 @@ static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev) static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; - - WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); + struct amdgpu_vmhub *hub; + u32 inst_mask; + int i; - WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base)); + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + hub = &adev->vmhub[AMDGPU_MMHUB0(i)]; + WREG32_SOC15_OFFSET(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); + } } static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev) { uint64_t pt_base; + u32 inst_mask; + int i; if (adev->gmc.pdb0_bo) pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo); @@ -76,187 +89,248 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev) /* If use GART for FB translation, vmid0 page table covers both * vram and system memory (gart) */ - if (adev->gmc.pdb0_bo) { - WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->gmc.fb_start >> 12)); - WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->gmc.fb_start >> 44)); - - WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, - (u32)(adev->gmc.gart_end >> 12)); - WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, - (u32)(adev->gmc.gart_end >> 44)); - - } else { - WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->gmc.gart_start >> 12)); - WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->gmc.gart_start >> 44)); - - WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, - (u32)(adev->gmc.gart_end >> 12)); - WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, - (u32)(adev->gmc.gart_end >> 44)); + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + if (adev->gmc.pdb0_bo) { + WREG32_SOC15(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.fb_start >> 12)); + WREG32_SOC15(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.fb_start >> 44)); + + WREG32_SOC15(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); + + } else { + WREG32_SOC15(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); + } } } static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev) { + uint32_t tmp, inst_mask; uint64_t value; - uint32_t tmp; + int i; - /* Program the AGP BAR */ - WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BASE, 0); - WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); - WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + /* Program the AGP BAR */ + WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BASE, 0); + WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT, + adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP, + adev->gmc.agp_end >> 24); - /* Program the system aperture low logical page number. */ - WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, - min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + if (amdgpu_sriov_vf(adev)) + return; - WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - /* In the case squeezing vram into GART aperture, we don't use - * FB aperture and AGP aperture. Disable them. - */ - if (adev->gmc.pdb0_bo) { - WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_BOT, 0xFFFFFF); - WREG32_SOC15(MMHUB, 0, regMC_VM_AGP_TOP, 0); - WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP, 0); - WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE, 0x00FFFFFF); - WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x3FFFFFFF); - WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0); - } - if (amdgpu_sriov_vf(adev)) - return; + WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); - /* Set default page address. */ - value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); - WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, - (u32)(value >> 12)); - WREG32_SOC15(MMHUB, 0, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, - (u32)(value >> 44)); - - /* Program "protection fault". */ - WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, - (u32)(adev->dummy_page_addr >> 12)); - WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, - (u32)((u64)adev->dummy_page_addr >> 44)); - - tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL2); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); - WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL2, tmp); + /* In the case squeezing vram into GART aperture, we don't use + * FB aperture and AGP aperture. Disable them. + */ + if (adev->gmc.pdb0_bo) { + WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT, 0xFFFFFF); + WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP, 0); + WREG32_SOC15(MMHUB, i, regMC_VM_FB_LOCATION_TOP, 0); + WREG32_SOC15(MMHUB, i, regMC_VM_FB_LOCATION_BASE, + 0x00FFFFFF); + WREG32_SOC15(MMHUB, i, + regMC_VM_SYSTEM_APERTURE_LOW_ADDR, + 0x3FFFFFFF); + WREG32_SOC15(MMHUB, i, + regMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0); + } + + /* Set default page address. */ + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); + WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(MMHUB, i, + regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(MMHUB, i, + regVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL2, tmp); + } } static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev) { - uint32_t tmp; + uint32_t tmp, inst_mask; + int i; /* Setup TLB control */ - tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL); - - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, 1); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, - SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, - MTYPE, MTYPE_UC);/* XXX for emulation. */ - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); - - WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp); + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, + 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); + + WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp); + } } static void mmhub_v1_8_init_cache_regs(struct amdgpu_device *adev) { - uint32_t tmp; + uint32_t tmp, inst_mask; + int i; if (amdgpu_sriov_vf(adev)) return; /* Setup L2 cache */ - tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1); - /* XXX for emulation, Refer to closed source code.*/ - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, - 0); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); - WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL, tmp); - - tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL2); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); - WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL2, tmp); - - tmp = regVM_L2_CNTL3_DEFAULT; - if (adev->gmc.translate_further) { - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, - L2_CACHE_BIGK_FRAGMENT_SIZE, 9); - } else { - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, - L2_CACHE_BIGK_FRAGMENT_SIZE, 6); - } - WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL3, tmp); - - tmp = regVM_L2_CNTL4_DEFAULT; - if (adev->gmc.xgmi.connected_to_cpu) { - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, - VMC_TAP_PDE_REQUEST_PHYSICAL, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, - VMC_TAP_PTE_REQUEST_PHYSICAL, 1); - } else { - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, - VMC_TAP_PDE_REQUEST_PHYSICAL, 0); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, - VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + tmp = RREG32_SOC15(MMHUB, i, regVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, + ENABLE_L2_FRAGMENT_PROCESSING, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, + 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, + CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, + IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, i, regVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(MMHUB, i, regVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, + 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(MMHUB, i, regVM_L2_CNTL2, tmp); + + tmp = regVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(MMHUB, i, regVM_L2_CNTL3, tmp); + + tmp = regVM_L2_CNTL4_DEFAULT; + /* For AMD APP APUs setup WC memory */ + if (adev->gmc.xgmi.connected_to_cpu || adev->gmc.is_app_apu) { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, 1); + } else { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + } + WREG32_SOC15(MMHUB, i, regVM_L2_CNTL4, tmp); } - WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL4, tmp); } static void mmhub_v1_8_enable_system_domain(struct amdgpu_device *adev) { - uint32_t tmp; - - tmp = RREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_CNTL); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, - adev->gmc.vmid0_page_table_depth); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE, - adev->gmc.vmid0_page_table_block_size); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); - WREG32_SOC15(MMHUB, 0, regVM_CONTEXT0_CNTL, tmp); + uint32_t tmp, inst_mask; + int i; + + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + tmp = RREG32_SOC15(MMHUB, i, regVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, + adev->gmc.vmid0_page_table_depth); + tmp = REG_SET_FIELD(tmp, + VM_CONTEXT0_CNTL, PAGE_TABLE_BLOCK_SIZE, + adev->gmc.vmid0_page_table_block_size); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_CNTL, tmp); + } } static void mmhub_v1_8_disable_identity_aperture(struct amdgpu_device *adev) { + u32 inst_mask; + int i; + if (amdgpu_sriov_vf(adev)) return; - WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0xFFFFFFFF); - WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, 0x0000000F); - - WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); - WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); - - WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); - WREG32_SOC15(MMHUB, 0, regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + WREG32_SOC15(MMHUB, i, + regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0XFFFFFFFF); + WREG32_SOC15(MMHUB, i, + regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(MMHUB, i, + regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(MMHUB, i, + regVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); + + WREG32_SOC15(MMHUB, i, + regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); + WREG32_SOC15(MMHUB, i, + regVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); + } } static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; - unsigned num_level, block_size; - uint32_t tmp; - int i; + struct amdgpu_vmhub *hub; + unsigned int num_level, block_size; + uint32_t tmp, inst_mask; + int i, j; num_level = adev->vm_manager.num_level; block_size = adev->vm_manager.block_size; @@ -265,60 +339,75 @@ static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev) else block_size -= 9; - for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, - num_level); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, - 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PAGE_TABLE_BLOCK_SIZE, - block_size); - /* On Aldebaran, XNACK can be enabled in the SQ per-process. - * Retry faults need to be enabled for that to work. - */ - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - 1); - WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, - i * hub->ctx_distance, tmp); - WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, - i * hub->ctx_addr_distance, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, - i * hub->ctx_addr_distance, 0); - WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, - i * hub->ctx_addr_distance, - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, - i * hub->ctx_addr_distance, - upper_32_bits(adev->vm_manager.max_pfn - 1)); + inst_mask = adev->aid_mask; + for_each_inst(j, inst_mask) { + hub = &adev->vmhub[AMDGPU_MMHUB0(j)]; + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL, + i); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PAGE_TABLE_DEPTH, num_level); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + block_size); + /* On 9.4.3, XNACK can be enabled in the SQ + * per-process. Retry faults need to be enabled for + * that to work. + */ + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 1); + WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, j, + regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, j, + regVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, j, + regVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, j, + regVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } } } static void mmhub_v1_8_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; - unsigned i; - - for (i = 0; i < 18; ++i) { - WREG32_SOC15_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, - i * hub->eng_addr_distance, 0xffffffff); - WREG32_SOC15_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, - i * hub->eng_addr_distance, 0x1f); + struct amdgpu_vmhub *hub; + u32 i, j, inst_mask; + + inst_mask = adev->aid_mask; + for_each_inst(j, inst_mask) { + hub = &adev->vmhub[AMDGPU_MMHUB0(j)]; + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, j, + regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, j, + regVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x1f); + } } } @@ -352,28 +441,34 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev) static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub; u32 tmp; - u32 i; + u32 i, j, inst_mask; /* Disable all tables */ - for (i = 0; i < 16; i++) - WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_CNTL, - i * hub->ctx_distance, 0); - - /* Setup TLB control */ - tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp); - - if (!amdgpu_sriov_vf(adev)) { - /* Setup L2 cache */ - tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL, tmp); - WREG32_SOC15(MMHUB, 0, regVM_L2_CNTL3, 0); + inst_mask = adev->aid_mask; + for_each_inst(j, inst_mask) { + hub = &adev->vmhub[AMDGPU_MMHUB0(j)]; + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, + 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL, tmp); + + if (!amdgpu_sriov_vf(adev)) { + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, + 0); + WREG32_SOC15(MMHUB, j, regVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0); + } } } @@ -385,73 +480,83 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) */ static void mmhub_v1_8_set_fault_enable_default(struct amdgpu_device *adev, bool value) { - u32 tmp; + u32 tmp, inst_mask; + int i; if (amdgpu_sriov_vf(adev)) return; - tmp = RREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, - value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - if (!value) { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + tmp = RREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - CRASH_ON_NO_RETRY_FAULT, 1); + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, - CRASH_ON_RETRY_FAULT, 1); + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + + WREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL, tmp); } - - WREG32_SOC15(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL, tmp); } static void mmhub_v1_8_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; - - hub->ctx0_ptb_addr_lo32 = - SOC15_REG_OFFSET(MMHUB, 0, - regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); - hub->ctx0_ptb_addr_hi32 = - SOC15_REG_OFFSET(MMHUB, 0, - regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); - hub->vm_inv_eng0_req = - SOC15_REG_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_REQ); - hub->vm_inv_eng0_ack = - SOC15_REG_OFFSET(MMHUB, 0, regVM_INVALIDATE_ENG0_ACK); - hub->vm_context0_cntl = - SOC15_REG_OFFSET(MMHUB, 0, regVM_CONTEXT0_CNTL); - hub->vm_l2_pro_fault_status = - SOC15_REG_OFFSET(MMHUB, 0, regVM_L2_PROTECTION_FAULT_STATUS); - hub->vm_l2_pro_fault_cntl = - SOC15_REG_OFFSET(MMHUB, 0, regVM_L2_PROTECTION_FAULT_CNTL); - - hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL; - hub->ctx_addr_distance = regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - - regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; - hub->eng_distance = regVM_INVALIDATE_ENG1_REQ - regVM_INVALIDATE_ENG0_REQ; - hub->eng_addr_distance = regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - - regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + struct amdgpu_vmhub *hub; + u32 inst_mask; + int i; + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + hub = &adev->vmhub[AMDGPU_MMHUB0(i)]; + + hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, i, + regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, i, regVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, i, regVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, i, regVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = SOC15_REG_OFFSET(MMHUB, i, + regVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, i, + regVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = regVM_CONTEXT1_CNTL - regVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = + regVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = regVM_INVALIDATE_ENG1_REQ - + regVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = regVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + } } static int mmhub_v1_8_set_clockgating(struct amdgpu_device *adev, @@ -475,3 +580,277 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .set_clockgating = mmhub_v1_8_set_clockgating, .get_clockgating = mmhub_v1_8_get_clockgating, }; + +static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = { + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_CE_ERR_STATUS_LO, regMMEA0_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"}, + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_CE_ERR_STATUS_LO, regMMEA1_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"}, + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_CE_ERR_STATUS_LO, regMMEA2_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"}, + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_CE_ERR_STATUS_LO, regMMEA3_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"}, + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_CE_ERR_STATUS_LO, regMMEA4_CE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"}, + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_CE_ERR_STATUS_LO, regMM_CANE_CE_ERR_STATUS_HI), + 1, 0, "MM_CANE"}, +}; + +static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ue_reg_list[] = { + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA0_UE_ERR_STATUS_LO, regMMEA0_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA0"}, + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA1_UE_ERR_STATUS_LO, regMMEA1_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA1"}, + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA2_UE_ERR_STATUS_LO, regMMEA2_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA2"}, + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA3_UE_ERR_STATUS_LO, regMMEA3_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA3"}, + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMMEA4_UE_ERR_STATUS_LO, regMMEA4_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "MMEA4"}, + {AMDGPU_RAS_REG_ENTRY(MMHUB, 0, regMM_CANE_UE_ERR_STATUS_LO, regMM_CANE_UE_ERR_STATUS_HI), + 1, 0, "MM_CANE"}, +}; + +static const struct amdgpu_ras_memory_id_entry mmhub_v1_8_ras_memory_list[] = { + {AMDGPU_MMHUB_WGMI_PAGEMEM, "MMEA_WGMI_PAGEMEM"}, + {AMDGPU_MMHUB_RGMI_PAGEMEM, "MMEA_RGMI_PAGEMEM"}, + {AMDGPU_MMHUB_WDRAM_PAGEMEM, "MMEA_WDRAM_PAGEMEM"}, + {AMDGPU_MMHUB_RDRAM_PAGEMEM, "MMEA_RDRAM_PAGEMEM"}, + {AMDGPU_MMHUB_WIO_CMDMEM, "MMEA_WIO_CMDMEM"}, + {AMDGPU_MMHUB_RIO_CMDMEM, "MMEA_RIO_CMDMEM"}, + {AMDGPU_MMHUB_WGMI_CMDMEM, "MMEA_WGMI_CMDMEM"}, + {AMDGPU_MMHUB_RGMI_CMDMEM, "MMEA_RGMI_CMDMEM"}, + {AMDGPU_MMHUB_WDRAM_CMDMEM, "MMEA_WDRAM_CMDMEM"}, + {AMDGPU_MMHUB_RDRAM_CMDMEM, "MMEA_RDRAM_CMDMEM"}, + {AMDGPU_MMHUB_MAM_DMEM0, "MMEA_MAM_DMEM0"}, + {AMDGPU_MMHUB_MAM_DMEM1, "MMEA_MAM_DMEM1"}, + {AMDGPU_MMHUB_MAM_DMEM2, "MMEA_MAM_DMEM2"}, + {AMDGPU_MMHUB_MAM_DMEM3, "MMEA_MAM_DMEM3"}, + {AMDGPU_MMHUB_WRET_TAGMEM, "MMEA_WRET_TAGMEM"}, + {AMDGPU_MMHUB_RRET_TAGMEM, "MMEA_RRET_TAGMEM"}, + {AMDGPU_MMHUB_WIO_DATAMEM, "MMEA_WIO_DATAMEM"}, + {AMDGPU_MMHUB_WGMI_DATAMEM, "MMEA_WGMI_DATAMEM"}, + {AMDGPU_MMHUB_WDRAM_DATAMEM, "MMEA_WDRAM_DATAMEM"}, +}; + +static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev, + uint32_t mmhub_inst, + void *ras_err_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status; + + amdgpu_ras_inst_query_ras_error_count(adev, + mmhub_v1_8_ce_reg_list, + ARRAY_SIZE(mmhub_v1_8_ce_reg_list), + mmhub_v1_8_ras_memory_list, + ARRAY_SIZE(mmhub_v1_8_ras_memory_list), + mmhub_inst, + AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, + &err_data->ce_count); + amdgpu_ras_inst_query_ras_error_count(adev, + mmhub_v1_8_ue_reg_list, + ARRAY_SIZE(mmhub_v1_8_ue_reg_list), + mmhub_v1_8_ras_memory_list, + ARRAY_SIZE(mmhub_v1_8_ras_memory_list), + mmhub_inst, + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + &err_data->ue_count); +} + +static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev, + void *ras_err_status) +{ + uint32_t inst_mask; + uint32_t i; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { + dev_warn(adev->dev, "MMHUB RAS is not supported\n"); + return; + } + + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) + mmhub_v1_8_inst_query_ras_error_count(adev, i, ras_err_status); +} + +static void mmhub_v1_8_inst_reset_ras_error_count(struct amdgpu_device *adev, + uint32_t mmhub_inst) +{ + amdgpu_ras_inst_reset_ras_error_count(adev, + mmhub_v1_8_ce_reg_list, + ARRAY_SIZE(mmhub_v1_8_ce_reg_list), + mmhub_inst); + amdgpu_ras_inst_reset_ras_error_count(adev, + mmhub_v1_8_ue_reg_list, + ARRAY_SIZE(mmhub_v1_8_ue_reg_list), + mmhub_inst); +} + +static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev) +{ + uint32_t inst_mask; + uint32_t i; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { + dev_warn(adev->dev, "MMHUB RAS is not supported\n"); + return; + } + + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) + mmhub_v1_8_inst_reset_ras_error_count(adev, i); +} + +static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = { + regMMEA0_ERR_STATUS, + regMMEA1_ERR_STATUS, + regMMEA2_ERR_STATUS, + regMMEA3_ERR_STATUS, + regMMEA4_ERR_STATUS, +}; + +static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev, + uint32_t mmhub_inst) +{ + uint32_t reg_value; + uint32_t mmea_err_status_addr_dist; + uint32_t i; + + /* query mmea ras err status */ + mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS; + for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) { + reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, + regMMEA0_ERR_STATUS, + i * mmea_err_status_addr_dist); + if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { + dev_warn(adev->dev, + "Detected MMEA%d err in MMHUB%d, status: 0x%x\n", + i, mmhub_inst, reg_value); + } + } + + /* query mm_cane ras err status */ + reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS); + if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) || + REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) || + REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) { + dev_warn(adev->dev, + "Detected MM CANE err in MMHUB%d, status: 0x%x\n", + mmhub_inst, reg_value); + } +} + +static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev) +{ + uint32_t inst_mask; + uint32_t i; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { + dev_warn(adev->dev, "MMHUB RAS is not supported\n"); + return; + } + + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) + mmhub_v1_8_inst_query_ras_err_status(adev, i); +} + +static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev, + uint32_t mmhub_inst) +{ + uint32_t mmea_cgtt_clk_cntl_addr_dist; + uint32_t mmea_err_status_addr_dist; + uint32_t reg_value; + uint32_t i; + + /* reset mmea ras err status */ + mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL; + mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS; + for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) { + /* force clk branch on for response path + * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1 + */ + reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, + regMMEA0_CGTT_CLK_CTRL, + i * mmea_cgtt_clk_cntl_addr_dist); + reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL, + SOFT_OVERRIDE_RETURN, 1); + WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, + regMMEA0_CGTT_CLK_CTRL, + i * mmea_cgtt_clk_cntl_addr_dist, + reg_value); + + /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ + reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, + regMMEA0_ERR_STATUS, + i * mmea_err_status_addr_dist); + reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS, + CLEAR_ERROR_STATUS, 1); + WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, + regMMEA0_ERR_STATUS, + i * mmea_err_status_addr_dist, + reg_value); + + /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */ + reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, + regMMEA0_CGTT_CLK_CTRL, + i * mmea_cgtt_clk_cntl_addr_dist); + reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL, + SOFT_OVERRIDE_RETURN, 0); + WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, + regMMEA0_CGTT_CLK_CTRL, + i * mmea_cgtt_clk_cntl_addr_dist, + reg_value); + } + + /* reset mm_cane ras err status + * force clk branch on for response path + * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1 + */ + reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL); + reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, + SOFT_OVERRIDE_ATRET, 1); + WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value); + + /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ + reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS); + reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS, + CLEAR_ERROR_STATUS, 1); + WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value); + + /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */ + reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL); + reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, + SOFT_OVERRIDE_ATRET, 0); + WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value); +} + +static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev) +{ + uint32_t inst_mask; + uint32_t i; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { + dev_warn(adev->dev, "MMHUB RAS is not supported\n"); + return; + } + + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) + mmhub_v1_8_inst_reset_ras_err_status(adev, i); +} + +static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { + .query_ras_error_count = mmhub_v1_8_query_ras_error_count, + .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, + .query_ras_error_status = mmhub_v1_8_query_ras_error_status, + .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status, +}; + +struct amdgpu_mmhub_ras mmhub_v1_8_ras = { + .ras_block = { + .hw_ops = &mmhub_v1_8_ras_hw_ops, + }, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h index 0bb36200e4e5..126f0075ac50 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h @@ -24,5 +24,6 @@ #define __MMHUB_V1_8_H__ extern const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs; +extern struct amdgpu_mmhub_ras mmhub_v1_8_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 278e32db878d..8f76c6ecf50a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -187,7 +187,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, static void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -362,7 +362,7 @@ static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -412,7 +412,7 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -441,7 +441,7 @@ static int mmhub_v2_0_gart_enable(struct amdgpu_device *adev) static void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -520,7 +520,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_0_vmhub_funcs = { static void mmhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index fcf2813e70db..8bd0fc8d9d25 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -121,7 +121,7 @@ static void mmhub_v2_3_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); @@ -280,7 +280,7 @@ static void mmhub_v2_3_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -330,7 +330,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -373,7 +373,7 @@ static int mmhub_v2_3_gart_enable(struct amdgpu_device *adev) static void mmhub_v2_3_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -446,7 +446,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_3_vmhub_funcs = { static void mmhub_v2_3_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index 17a792616979..441379e91cfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -136,7 +136,7 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev, static void mmhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -319,7 +319,7 @@ static void mmhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -369,7 +369,7 @@ static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v3_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -398,7 +398,7 @@ static int mmhub_v3_0_gart_enable(struct amdgpu_device *adev) static void mmhub_v3_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -477,7 +477,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_vmhub_funcs = { static void mmhub_v3_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index 26509b6b8c24..12c7f4b46ea9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -138,7 +138,7 @@ static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -306,7 +306,7 @@ static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -356,7 +356,7 @@ static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -385,7 +385,7 @@ static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev) static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -459,7 +459,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = { static void mmhub_v3_0_1_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index 26abbc6a47ab..5dadc85abf7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -129,7 +129,7 @@ mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev, static void mmhub_v3_0_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -311,7 +311,7 @@ static void mmhub_v3_0_2_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -361,7 +361,7 @@ static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v3_0_2_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -390,7 +390,7 @@ static int mmhub_v3_0_2_gart_enable(struct amdgpu_device *adev) static void mmhub_v3_0_2_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -469,7 +469,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_2_vmhub_funcs = { static void mmhub_v3_0_2_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 72083e96222f..e790f890aec6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -57,7 +57,7 @@ static u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid, uint32_t vmid, uint64_t value) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, @@ -294,7 +294,7 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned int num_level, block_size; uint32_t tmp; int i; @@ -363,7 +363,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, int hubid) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -404,7 +404,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i, j; @@ -507,8 +507,8 @@ static void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v9_4_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = - {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]}; + struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = { + &adev->vmhub[AMDGPU_MMHUB0(0)], &adev->vmhub[AMDGPU_MMHUB1(0)]}; int i; for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h index 3e4e858a6965..a773ef61b78c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v3_0.h @@ -30,6 +30,8 @@ #define MMSCH_VERSION_MINOR 0 #define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR) +#define MMSCH_V3_0_VCN_INSTANCES 0x2 + enum mmsch_v3_0_command_type { MMSCH_COMMAND__DIRECT_REG_WRITE = 0, MMSCH_COMMAND__DIRECT_REG_POLLING = 2, @@ -47,7 +49,7 @@ struct mmsch_v3_0_table_info { struct mmsch_v3_0_init_header { uint32_t version; uint32_t total_size; - struct mmsch_v3_0_table_info inst[AMDGPU_MAX_VCN_INSTANCES]; + struct mmsch_v3_0_table_info inst[MMSCH_V3_0_VCN_INSTANCES]; }; struct mmsch_v3_0_cmd_direct_reg_header { diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h index 83653a50a1a2..796d4f8791e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h @@ -43,6 +43,8 @@ #define MMSCH_VF_MAILBOX_RESP__OK 0x1 #define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2 +#define MMSCH_V4_0_VCN_INSTANCES 0x2 + enum mmsch_v4_0_command_type { MMSCH_COMMAND__DIRECT_REG_WRITE = 0, MMSCH_COMMAND__DIRECT_REG_POLLING = 2, @@ -60,7 +62,7 @@ struct mmsch_v4_0_table_info { struct mmsch_v4_0_init_header { uint32_t version; uint32_t total_size; - struct mmsch_v4_0_table_info inst[AMDGPU_MAX_VCN_INSTANCES]; + struct mmsch_v4_0_table_info inst[MMSCH_V4_0_VCN_INSTANCES]; struct mmsch_v4_0_table_info jpegdec; }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index 24d12075ca3a..cd1a02d30420 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -30,6 +30,8 @@ #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include <uapi/linux/kfd_ioctl.h> +#define NPS_MODE_MASK 0x000000FFL + static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, @@ -66,6 +68,13 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan bool use_doorbell, int doorbell_index, int doorbell_size) { u32 doorbell_range = 0, doorbell_ctrl = 0; + int aid_id, dev_inst; + + dev_inst = GET_INST(SDMA0, instance); + aid_id = adev->sdma.instance[instance].aid_id; + + if (use_doorbell == false) + return; doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, @@ -80,9 +89,10 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_RANGE_SIZE, doorbell_size); - switch (instance) { + switch (dev_inst % adev->sdma.num_inst_per_aid) { case 0: - WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_1, doorbell_range); + WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_1, + 4 * aid_id, doorbell_range); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, @@ -94,10 +104,12 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x1); - WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_1_CTRL, doorbell_ctrl); + WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_1_CTRL, + aid_id, doorbell_ctrl); break; case 1: - WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_2, doorbell_range); + WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_2, + 4 * aid_id, doorbell_range); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, @@ -109,10 +121,12 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x2); - WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_2_CTRL, doorbell_ctrl); + WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_2_CTRL, + aid_id, doorbell_ctrl); break; case 2: - WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_3, doorbell_range); + WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_3, + 4 * aid_id, doorbell_range); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, @@ -124,10 +138,12 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x8); - WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_5_CTRL, doorbell_ctrl); + WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_5_CTRL, + aid_id, doorbell_ctrl); break; case 3: - WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_4, doorbell_range); + WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_4, + 4 * aid_id, doorbell_range); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, @@ -139,11 +155,12 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x9); - WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_6_CTRL, doorbell_ctrl); + WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_6_CTRL, + aid_id, doorbell_ctrl); break; default: break; - }; + } return; } @@ -152,6 +169,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do int doorbell_index, int instance) { u32 doorbell_range = 0, doorbell_ctrl = 0; + u32 aid_id = instance; if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, @@ -161,7 +179,12 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, BIF_DOORBELL0_RANGE_SIZE_ENTRY, - 0x8); + 0x9); + if (aid_id) + doorbell_range = REG_SET_FIELD(doorbell_range, + DOORBELL0_CTRL_ENTRY_0, + DOORBELL0_FENCE_ENABLE_ENTRY, + 0x4); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, @@ -174,10 +197,15 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, - S2A_DOORBELL_PORT1_RANGE_SIZE, 0x8); + S2A_DOORBELL_PORT1_RANGE_SIZE, 0x9); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4); + + WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17, + aid_id, doorbell_range); + WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_4_CTRL, + aid_id, doorbell_ctrl); } else { doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, @@ -185,10 +213,12 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_RANGE_SIZE, 0); - } - WREG32_SOC15(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17, doorbell_range); - WREG32_SOC15(NBIO, 0, regS2A_DOORBELL_ENTRY_4_CTRL, doorbell_ctrl); + WREG32_SOC15_OFFSET(NBIO, 0, regDOORBELL0_CTRL_ENTRY_17, + aid_id, doorbell_range); + WREG32_SOC15_EXT(NBIO, aid_id, regS2A_DOORBELL_ENTRY_4_CTRL, + aid_id, doorbell_ctrl); + } } static void nbio_v7_9_enable_doorbell_aperture(struct amdgpu_device *adev, @@ -235,7 +265,7 @@ static void nbio_v7_9_ih_doorbell_range(struct amdgpu_device *adev, ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, DOORBELL0_CTRL_ENTRY_0, BIF_DOORBELL0_RANGE_SIZE_ENTRY, - 0x4); + 0x8); ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, @@ -248,7 +278,7 @@ static void nbio_v7_9_ih_doorbell_range(struct amdgpu_device *adev, S2A_DOORBELL_PORT1_RANGE_OFFSET, 0); ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, - S2A_DOORBELL_PORT1_RANGE_SIZE, 0x4); + S2A_DOORBELL_PORT1_RANGE_SIZE, 0x8); ih_doorbell_ctrl = REG_SET_FIELD(ih_doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0); @@ -319,6 +349,11 @@ static u32 nbio_v7_9_get_pcie_data_offset(struct amdgpu_device *adev) return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2); } +static u32 nbio_v7_9_get_pcie_index_hi_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2_HI); +} + const struct nbio_hdp_flush_reg nbio_v7_9_hdp_flush_reg = { .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, @@ -347,11 +382,57 @@ static void nbio_v7_9_enable_doorbell_interrupt(struct amdgpu_device *adev, DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); } +static int nbio_v7_9_get_compute_partition_mode(struct amdgpu_device *adev) +{ + u32 tmp, px; + + tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_COMPUTE_STATUS); + px = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_COMPUTE_STATUS, + PARTITION_MODE); + + return px; +} + +static u32 nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev, + u32 *supp_modes) +{ + u32 tmp; + + tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS); + tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS, NPS_MODE); + + if (supp_modes) { + *supp_modes = + RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_CAP); + } + + return ffs(tmp); +} + +static void nbio_v7_9_init_registers(struct amdgpu_device *adev) +{ + u32 inst_mask; + int i; + + WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE, + 0xff & ~(adev->gfx.xcc_mask)); + + WREG32_SOC15(NBIO, 0, regBIFC_GFX_INT_MONITOR_MASK, 0x7ff); + + inst_mask = adev->aid_mask & ~1U; + for_each_inst(i, inst_mask) { + WREG32_SOC15_EXT(NBIO, i, regXCC_DOORBELL_FENCE, i, + XCC_DOORBELL_FENCE__SHUB_SLV_MODE_MASK); + + } +} + const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset, .get_pcie_index_offset = nbio_v7_9_get_pcie_index_offset, .get_pcie_data_offset = nbio_v7_9_get_pcie_data_offset, + .get_pcie_index_hi_offset = nbio_v7_9_get_pcie_index_hi_offset, .get_rev_id = nbio_v7_9_get_rev_id, .mc_access_enable = nbio_v7_9_mc_access_enable, .get_memsize = nbio_v7_9_get_memsize, @@ -366,4 +447,7 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_clockgating_state = nbio_v7_9_get_clockgating_state, .ih_control = nbio_v7_9_ih_control, .remap_hdp_registers = nbio_v7_9_remap_hdp_registers, + .get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode, + .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, + .init_registers = nbio_v7_9_init_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 0fb6013441f0..51523b27a186 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -341,11 +341,6 @@ void nv_grbm_select(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl); } -static void nv_vga_set_state(struct amdgpu_device *adev, bool state) -{ - /* todo */ -} - static bool nv_read_disabled_bios(struct amdgpu_device *adev) { /* todo */ @@ -381,12 +376,12 @@ static uint32_t nv_read_indexed_register(struct amdgpu_device *adev, u32 se_num, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } @@ -632,9 +627,9 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev, bool enter) { if (enter) - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); else - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); if (adev->gfx.funcs->update_perfmon_mgcg) adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); @@ -654,7 +649,6 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .read_register = &nv_read_register, .reset = &nv_asic_reset, .reset_method = &nv_asic_reset_method, - .set_vga_state = &nv_vga_set_state, .get_xclk = &nv_get_xclk, .set_uvd_clocks = &nv_set_uvd_clocks, .set_vce_clocks = &nv_set_vce_clocks, diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h index fd6b58243b03..631dafb92299 100644 --- a/drivers/gpu/drm/amd/amdgpu/nvd.h +++ b/drivers/gpu/drm/amd/amdgpu/nvd.h @@ -462,6 +462,9 @@ # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) #define PACKET3_RUN_LIST 0xA5 #define PACKET3_MAP_PROCESS_VM 0xA6 - +/* GFX11 */ +#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0 +# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0) +# define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM (1 << 0) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 22c775f39119..18917df785ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -102,6 +102,7 @@ enum psp_gfx_cmd_id GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */ GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */ GFX_CMD_ID_BOOT_CFG = 0x00000022, /* Boot Config */ + GFX_CMD_ID_SRIOV_SPATIAL_PART = 0x00000027, /* Configure spatial partitioning mode */ }; /* PSP boot config sub-commands */ @@ -338,6 +339,13 @@ struct psp_gfx_cmd_boot_cfg uint32_t boot_config_valid; /* dynamic boot configuration valid bits bitmask */ }; +struct psp_gfx_cmd_sriov_spatial_part { + uint32_t mode; + uint32_t override_ips; + uint32_t override_xcds_avail; + uint32_t override_this_aid; +}; + /* All GFX ring buffer commands. */ union psp_gfx_commands { @@ -351,6 +359,7 @@ union psp_gfx_commands struct psp_gfx_cmd_setup_tmr cmd_setup_vmr; struct psp_gfx_cmd_load_toc cmd_load_toc; struct psp_gfx_cmd_boot_cfg boot_cfg; + struct psp_gfx_cmd_sriov_spatial_part cmd_spatial_part; }; struct psp_gfx_uresp_reserved diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index caee76ab7110..f9cb0d2c89d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -624,10 +624,11 @@ static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd) WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_73, 1); if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE) - return 0; - - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, false); if (ret) { dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h index b2414a729ca1..de5677ce4330 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.h @@ -25,6 +25,8 @@ #include "amdgpu_psp.h" +#define PSP_SPIROM_UPDATE_TIMEOUT 60000 /* 60s */ + void psp_v13_0_set_psp_funcs(struct psp_context *psp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index fd2a7b66ac56..51afc92994a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -466,8 +466,6 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) #endif /* enable DMA IBs */ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - - ring->sched.ready = true; } sdma_v2_4_enable(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index e572389089d2..344202870aeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -734,8 +734,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) #endif /* enable DMA IBs */ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - - ring->sched.ready = true; } /* unhalt the MEs */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 9295ac7edd56..cd37f45e01a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1114,8 +1114,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) #endif /* enable DMA IBs */ WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); - - ring->sched.ready = true; } /** @@ -1202,8 +1200,6 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i) #endif /* enable DMA IBs */ WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); - - ring->sched.ready = true; } static void @@ -1825,12 +1821,12 @@ static int sdma_v4_0_sw_init(void *handle) /* * On Arcturus, SDMA instance 5~7 has a different vmhub - * type(AMDGPU_MMHUB_1). + * type(AMDGPU_MMHUB1). */ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, @@ -1847,13 +1843,23 @@ static int sdma_v4_0_sw_init(void *handle) /* paging queue use same doorbell index/routing as gfx queue * with 0x400 (4096 dwords) offset on second doorbell page */ - ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; - ring->doorbell_index += 0x400; + if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) && + adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0)) { + ring->doorbell_index = + adev->doorbell_index.sdma_engine[i] << 1; + ring->doorbell_index += 0x400; + } else { + /* From vega20, the sdma_doorbell_range in 1st + * doorbell page is reserved for page queue. + */ + ring->doorbell_index = + (adev->doorbell_index.sdma_engine[i] + 1) << 1; + } if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, @@ -2306,7 +2312,7 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = { static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, + .align_mask = 0xff, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, @@ -2338,7 +2344,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, + .align_mask = 0xff, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c index 6f9895cdddb1..0ddb6955a6d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c @@ -141,6 +141,10 @@ static const struct soc15_ras_field_entry sdma_v4_4_ras_fields[] = { SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_RDBST_FIFO_SED), 0, 0, }, + { "SDMA_UTCL1_WR_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2), + SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_UTCL1_WR_FIFO_SED), + 0, 0, + }, { "SDMA_DATA_LUT_FIFO_SED", SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_EDC_COUNTER2), SOC15_REG_FIELD(SDMA0_EDC_COUNTER2, SDMA_DATA_LUT_FIFO_SED), 0, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 64dcaa2670dd..ea5e12390d18 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -27,6 +27,7 @@ #include <linux/pci.h> #include "amdgpu.h" +#include "amdgpu_xcp.h" #include "amdgpu_ucode.h" #include "amdgpu_trace.h" @@ -53,11 +54,14 @@ static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); +static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) { - return (adev->reg_offset[SDMA0_HWIP][instance][0] + offset); + u32 dev_inst = GET_INST(SDMA0, instance); + + return (adev->reg_offset[SDMA0_HWIP][dev_inst][0] + offset); } static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num) @@ -92,13 +96,25 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) } } -static void sdma_v4_4_2_init_golden_registers(struct amdgpu_device *adev) +static void sdma_v4_4_2_inst_init_golden_registers(struct amdgpu_device *adev, + uint32_t inst_mask) { - switch (adev->ip_versions[SDMA0_HWIP][0]) { - case IP_VERSION(4, 4, 2): - break; - default: - break; + u32 val; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG); + val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG, NUM_BANKS, 4); + val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG, + PIPE_INTERLEAVE_SIZE, 0); + WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG, val); + + val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ); + val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ, NUM_BANKS, + 4); + val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ, + PIPE_INTERLEAVE_SIZE, 0); + WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ, val); } } @@ -399,19 +415,21 @@ static void sdma_v4_4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 /** - * sdma_v4_4_2_gfx_stop - stop the gfx async dma engines + * sdma_v4_4_2_inst_gfx_stop - stop the gfx async dma engines * * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be disabled * * Stop the gfx async dma ring buffers. */ -static void sdma_v4_4_2_gfx_stop(struct amdgpu_device *adev) +static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, + uint32_t inst_mask) { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i, unset = 0; - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { sdma[i] = &adev->sdma.instance[i].ring; if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { @@ -429,32 +447,36 @@ static void sdma_v4_4_2_gfx_stop(struct amdgpu_device *adev) } /** - * sdma_v4_4_2_rlc_stop - stop the compute async dma engines + * sdma_v4_4_2_inst_rlc_stop - stop the compute async dma engines * * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be disabled * * Stop the compute async dma queues. */ -static void sdma_v4_4_2_rlc_stop(struct amdgpu_device *adev) +static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev, + uint32_t inst_mask) { /* XXX todo */ } /** - * sdma_v4_4_2_page_stop - stop the page async dma engines + * sdma_v4_4_2_inst_page_stop - stop the page async dma engines * * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be disabled * * Stop the page async dma ring buffers. */ -static void sdma_v4_4_2_page_stop(struct amdgpu_device *adev) +static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev, + uint32_t inst_mask) { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; bool unset = false; - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { sdma[i] = &adev->sdma.instance[i].page; if ((adev->mman.buffer_funcs_ring == sdma[i]) && @@ -475,14 +497,16 @@ static void sdma_v4_4_2_page_stop(struct amdgpu_device *adev) } /** - * sdma_v4_4_2_ctx_switch_enable - stop the async dma engines context switch + * sdma_v4_4_2_inst_ctx_switch_enable - stop the async dma engines context switch * * @adev: amdgpu_device pointer * @enable: enable/disable the DMA MEs context switch. + * @inst_mask: mask of dma engine instances to be enabled * * Halt or unhalt the async dma engines context switch. */ -static void sdma_v4_4_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +static void sdma_v4_4_2_inst_ctx_switch_enable(struct amdgpu_device *adev, + bool enable, uint32_t inst_mask) { u32 f32_cntl, phase_quantum = 0; int i; @@ -511,7 +535,7 @@ static void sdma_v4_4_2_ctx_switch_enable(struct amdgpu_device *adev, bool enabl unit << SDMA_PHASE0_QUANTUM__UNIT__SHIFT; } - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { f32_cntl = RREG32_SDMA(i, regSDMA_CNTL); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); @@ -525,30 +549,36 @@ static void sdma_v4_4_2_ctx_switch_enable(struct amdgpu_device *adev, bool enabl /* Extend page fault timeout to avoid interrupt storm */ WREG32_SDMA(i, regSDMA_UTCL1_TIMEOUT, 0x00800080); } - } /** - * sdma_v4_4_2_enable - stop the async dma engines + * sdma_v4_4_2_inst_enable - stop the async dma engines * * @adev: amdgpu_device pointer * @enable: enable/disable the DMA MEs. + * @inst_mask: mask of dma engine instances to be enabled * * Halt or unhalt the async dma engines. */ -static void sdma_v4_4_2_enable(struct amdgpu_device *adev, bool enable) +static void sdma_v4_4_2_inst_enable(struct amdgpu_device *adev, bool enable, + uint32_t inst_mask) { u32 f32_cntl; int i; if (!enable) { - sdma_v4_4_2_gfx_stop(adev); - sdma_v4_4_2_rlc_stop(adev); + sdma_v4_4_2_inst_gfx_stop(adev, inst_mask); + sdma_v4_4_2_inst_rlc_stop(adev, inst_mask); if (adev->sdma.has_page_queue) - sdma_v4_4_2_page_stop(adev); + sdma_v4_4_2_inst_page_stop(adev, inst_mask); + + /* SDMA FW needs to respond to FREEZE requests during reset. + * Keep it running during reset */ + if (!amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) + return; } - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { f32_cntl = RREG32_SDMA(i, regSDMA_F32_CNTL); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_F32_CNTL, HALT, enable ? 0 : 1); WREG32_SDMA(i, regSDMA_F32_CNTL, f32_cntl); @@ -659,8 +689,6 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) #endif /* enable DMA IBs */ WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); - - ring->sched.ready = true; } /** @@ -750,8 +778,6 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) #endif /* enable DMA IBs */ WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl); - - ring->sched.ready = true; } static void sdma_v4_4_2_init_pg(struct amdgpu_device *adev) @@ -760,14 +786,16 @@ static void sdma_v4_4_2_init_pg(struct amdgpu_device *adev) } /** - * sdma_v4_4_2_rlc_resume - setup and start the async dma engines + * sdma_v4_4_2_inst_rlc_resume - setup and start the async dma engines * * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be enabled * * Set up the compute DMA queues and enable them. * Returns 0 for success, error for failure. */ -static int sdma_v4_4_2_rlc_resume(struct amdgpu_device *adev) +static int sdma_v4_4_2_inst_rlc_resume(struct amdgpu_device *adev, + uint32_t inst_mask) { sdma_v4_4_2_init_pg(adev); @@ -775,14 +803,16 @@ static int sdma_v4_4_2_rlc_resume(struct amdgpu_device *adev) } /** - * sdma_v4_4_2_load_microcode - load the sDMA ME ucode + * sdma_v4_4_2_inst_load_microcode - load the sDMA ME ucode * * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be enabled * * Loads the sDMA0/1 ucode. * Returns 0 for success, -EINVAL if the ucode is not available. */ -static int sdma_v4_4_2_load_microcode(struct amdgpu_device *adev) +static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev, + uint32_t inst_mask) { const struct sdma_firmware_header_v1_0 *hdr; const __le32 *fw_data; @@ -790,9 +820,9 @@ static int sdma_v4_4_2_load_microcode(struct amdgpu_device *adev) int i, j; /* halt the MEs */ - sdma_v4_4_2_enable(adev, false); + sdma_v4_4_2_inst_enable(adev, false, inst_mask); - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { if (!adev->sdma.instance[i].fw) return -EINVAL; @@ -818,38 +848,42 @@ static int sdma_v4_4_2_load_microcode(struct amdgpu_device *adev) } /** - * sdma_v4_4_2_start - setup and start the async dma engines + * sdma_v4_4_2_inst_start - setup and start the async dma engines * * @adev: amdgpu_device pointer + * @inst_mask: mask of dma engine instances to be enabled * * Set up the DMA engines and enable them. * Returns 0 for success, error for failure. */ -static int sdma_v4_4_2_start(struct amdgpu_device *adev) +static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, + uint32_t inst_mask) { struct amdgpu_ring *ring; + uint32_t tmp_mask; int i, r = 0; if (amdgpu_sriov_vf(adev)) { - sdma_v4_4_2_ctx_switch_enable(adev, false); - sdma_v4_4_2_enable(adev, false); + sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask); + sdma_v4_4_2_inst_enable(adev, false, inst_mask); } else { /* bypass sdma microcode loading on Gopher */ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && - !(adev->pdev->device == 0x49) && !(adev->pdev->device == 0x50)) { - r = sdma_v4_4_2_load_microcode(adev); + adev->sdma.instance[0].fw) { + r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask); if (r) return r; } /* unhalt the MEs */ - sdma_v4_4_2_enable(adev, true); + sdma_v4_4_2_inst_enable(adev, true, inst_mask); /* enable sdma ring preemption */ - sdma_v4_4_2_ctx_switch_enable(adev, true); + sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask); } /* start the gfx rings and rlc compute queues */ - for (i = 0; i < adev->sdma.num_instances; i++) { + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { uint32_t temp; WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); @@ -860,6 +894,8 @@ static int sdma_v4_4_2_start(struct amdgpu_device *adev) /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); + /* enable context empty interrupt during initialization */ + temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1); WREG32_SDMA(i, regSDMA_CNTL, temp); if (!amdgpu_sriov_vf(adev)) { @@ -876,15 +912,16 @@ static int sdma_v4_4_2_start(struct amdgpu_device *adev) } if (amdgpu_sriov_vf(adev)) { - sdma_v4_4_2_ctx_switch_enable(adev, true); - sdma_v4_4_2_enable(adev, true); + sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask); + sdma_v4_4_2_inst_enable(adev, true, inst_mask); } else { - r = sdma_v4_4_2_rlc_resume(adev); + r = sdma_v4_4_2_inst_rlc_resume(adev, inst_mask); if (r) return r; } - for (i = 0; i < adev->sdma.num_instances; i++) { + tmp_mask = inst_mask; + for_each_inst(i, tmp_mask) { ring = &adev->sdma.instance[i].ring; r = amdgpu_ring_test_helper(ring); @@ -1221,6 +1258,7 @@ static int sdma_v4_4_2_early_init(void *handle) sdma_v4_4_2_set_buffer_funcs(adev); sdma_v4_4_2_set_vm_pte_funcs(adev); sdma_v4_4_2_set_irq_funcs(adev); + sdma_v4_4_2_set_ras_funcs(adev); return 0; } @@ -1253,9 +1291,10 @@ static int sdma_v4_4_2_sw_init(void *handle) struct amdgpu_ring *ring; int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 aid_id; /* SDMA trap event */ - for (i = 0; i < adev->sdma.num_instances; i++) { + for (i = 0; i < adev->sdma.num_inst_per_aid; i++) { r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i), SDMA0_4_0__SRCID__SDMA_TRAP, &adev->sdma.trap_irq); @@ -1264,7 +1303,7 @@ static int sdma_v4_4_2_sw_init(void *handle) } /* SDMA SRAM ECC event */ - for (i = 0; i < adev->sdma.num_instances; i++) { + for (i = 0; i < adev->sdma.num_inst_per_aid; i++) { r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i), SDMA0_4_0__SRCID__SDMA_SRAM_ECC, &adev->sdma.ecc_irq); @@ -1273,7 +1312,7 @@ static int sdma_v4_4_2_sw_init(void *handle) } /* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/ - for (i = 0; i < adev->sdma.num_instances; i++) { + for (i = 0; i < adev->sdma.num_inst_per_aid; i++) { r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i), SDMA0_4_0__SRCID__SDMA_VM_HOLE, &adev->sdma.vm_hole_irq); @@ -1303,15 +1342,17 @@ static int sdma_v4_4_2_sw_init(void *handle) ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; + aid_id = adev->sdma.instance[i].aid_id; DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, ring->use_doorbell?"true":"false"); /* doorbell size is 2 dwords, get DWORD offset */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(aid_id); - sprintf(ring->name, "sdma%d", i); + sprintf(ring->name, "sdma%d.%d", aid_id, + i % adev->sdma.num_inst_per_aid); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -1323,14 +1364,15 @@ static int sdma_v4_4_2_sw_init(void *handle) ring->ring_obj = NULL; ring->use_doorbell = true; - /* paging queue use same doorbell index/routing as gfx queue - * with 0x400 (4096 dwords) offset on second doorbell page + /* doorbell index of page queue is assigned right after + * gfx queue on the same instance */ - ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; - ring->doorbell_index += 0x400; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->doorbell_index = + (adev->doorbell_index.sdma_engine[i] + 1) << 1; + ring->vm_hub = AMDGPU_MMHUB0(aid_id); - sprintf(ring->name, "page%d", i); + sprintf(ring->name, "page%d.%d", aid_id, + i % adev->sdma.num_inst_per_aid); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i, @@ -1340,6 +1382,11 @@ static int sdma_v4_4_2_sw_init(void *handle) } } + if (amdgpu_sdma_ras_sw_init(adev)) { + dev_err(adev->dev, "fail to initialize sdma ras block\n"); + return -EINVAL; + } + return r; } @@ -1366,14 +1413,13 @@ static int sdma_v4_4_2_hw_init(void *handle) { int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t inst_mask; - if (adev->flags & AMD_IS_APU) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); - + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); if (!amdgpu_sriov_vf(adev)) - sdma_v4_4_2_init_golden_registers(adev); + sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); - r = sdma_v4_4_2_start(adev); + r = sdma_v4_4_2_inst_start(adev, inst_mask); return r; } @@ -1381,26 +1427,36 @@ static int sdma_v4_4_2_hw_init(void *handle) static int sdma_v4_4_2_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t inst_mask; int i; if (amdgpu_sriov_vf(adev)) return 0; - for (i = 0; i < adev->sdma.num_instances; i++) { - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, - AMDGPU_SDMA_IRQ_INSTANCE0 + i); + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + amdgpu_irq_put(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + } } - sdma_v4_4_2_ctx_switch_enable(adev, false); - sdma_v4_4_2_enable(adev, false); + sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask); + sdma_v4_4_2_inst_enable(adev, false, inst_mask); return 0; } +static int sdma_v4_4_2_set_clockgating_state(void *handle, + enum amd_clockgating_state state); + static int sdma_v4_4_2_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_in_reset(adev)) + sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); + return sdma_v4_4_2_hw_fini(adev); } @@ -1471,13 +1527,31 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t instance; + uint32_t instance, i; DRM_DEBUG("IH: SDMA trap\n"); instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + + /* Client id gives the SDMA instance in AID. To know the exact SDMA + * instance, interrupt entry gives the node id which corresponds to the AID instance. + * Match node id with the AID id associated with the SDMA instance. */ + for (i = instance; i < adev->sdma.num_instances; + i += adev->sdma.num_inst_per_aid) { + if (adev->sdma.instance[i].aid_id == + node_id_to_phys_map[entry->node_id]) + break; + } + + if (i >= adev->sdma.num_instances) { + dev_WARN_ONCE( + adev->dev, 1, + "Couldn't find the right sdma instance in trap handler"); + return 0; + } + switch (entry->ring_id) { case 0: - amdgpu_fence_process(&adev->sdma.instance[instance].ring); + amdgpu_fence_process(&adev->sdma.instance[i].ring); break; default: break; @@ -1496,7 +1570,7 @@ static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev, * be disabled and the driver should only look for the aggregated * interrupt via sync flood */ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) goto out; instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); @@ -1535,15 +1609,22 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - u32 sdma_edc_config; + u32 sdma_cntl; - sdma_edc_config = RREG32_SDMA(type, regCC_SDMA_EDC_CONFIG); - /* - * FIXME: This was inherited from Aldebaran, but no this field - * definition in the regspec of both Aldebaran and SDMA 4.4.2 - */ - sdma_edc_config |= (state == AMDGPU_IRQ_STATE_ENABLE) ? (1 << 2) : 0; - WREG32_SDMA(type, regCC_SDMA_EDC_CONFIG, sdma_edc_config); + sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, + DRAM_ECC_INT_ENABLE, 0); + WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl); + break; + /* sdma ecc interrupt is enabled by default + * driver doesn't need to do anything to + * enable the interrupt */ + case AMDGPU_IRQ_STATE_ENABLE: + default: + break; + } return 0; } @@ -1615,19 +1696,49 @@ static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev, return 0; } -static void sdma_v4_4_2_update_medium_grain_clock_gating( - struct amdgpu_device *adev, - bool enable) +static void sdma_v4_4_2_inst_update_medium_grain_light_sleep( + struct amdgpu_device *adev, bool enable, uint32_t inst_mask) { uint32_t data, def; int i; - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { - for (i = 0; i < adev->sdma.num_instances; i++) { + /* leave as default if it is not driver controlled */ + if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) + return; + + if (enable) { + for_each_inst(i, inst_mask) { + /* 1-not override: enable sdma mem light sleep */ + def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL); + data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32_SDMA(i, regSDMA_POWER_CNTL, data); + } + } else { + for_each_inst(i, inst_mask) { + /* 0-override:disable sdma mem light sleep */ + def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL); + data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32_SDMA(i, regSDMA_POWER_CNTL, data); + } + } +} + +static void sdma_v4_4_2_inst_update_medium_grain_clock_gating( + struct amdgpu_device *adev, bool enable, uint32_t inst_mask) +{ + uint32_t data, def; + int i; + + /* leave as default if it is not driver controlled */ + if (!(adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) + return; + + if (enable) { + for_each_inst(i, inst_mask) { def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL); - data &= ~(SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK | + data &= ~(SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK | SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK | SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK | SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK | @@ -1637,11 +1748,9 @@ static void sdma_v4_4_2_update_medium_grain_clock_gating( WREG32_SDMA(i, regSDMA_CLK_CTRL, data); } } else { - for (i = 0; i < adev->sdma.num_instances; i++) { + for_each_inst(i, inst_mask) { def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL); - data |= (SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK | + data |= (SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK | SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK | SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK | SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK | @@ -1653,45 +1762,21 @@ static void sdma_v4_4_2_update_medium_grain_clock_gating( } } - -static void sdma_v4_4_2_update_medium_grain_light_sleep( - struct amdgpu_device *adev, - bool enable) -{ - uint32_t data, def; - int i; - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { - for (i = 0; i < adev->sdma.num_instances; i++) { - /* 1-not override: enable sdma mem light sleep */ - def = data = RREG32_SDMA(0, regSDMA_POWER_CNTL); - data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (def != data) - WREG32_SDMA(0, regSDMA_POWER_CNTL, data); - } - } else { - for (i = 0; i < adev->sdma.num_instances; i++) { - /* 0-override:disable sdma mem light sleep */ - def = data = RREG32_SDMA(0, regSDMA_POWER_CNTL); - data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (def != data) - WREG32_SDMA(0, regSDMA_POWER_CNTL, data); - } - } -} - static int sdma_v4_4_2_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t inst_mask; if (amdgpu_sriov_vf(adev)) return 0; - sdma_v4_4_2_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE); - sdma_v4_4_2_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE); + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); + + sdma_v4_4_2_inst_update_medium_grain_clock_gating( + adev, state == AMD_CG_STATE_GATE, inst_mask); + sdma_v4_4_2_inst_update_medium_grain_light_sleep( + adev, state == AMD_CG_STATE_GATE, inst_mask); return 0; } @@ -1710,12 +1795,12 @@ static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags) *flags = 0; /* AMD_CG_SUPPORT_SDMA_MGCG */ - data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, regSDMA_CLK_CTRL)); - if (!(data & SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK)) + data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_CLK_CTRL)); + if (!(data & SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK)) *flags |= AMD_CG_SUPPORT_SDMA_MGCG; /* AMD_CG_SUPPORT_SDMA_LS */ - data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, regSDMA_POWER_CNTL)); + data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_POWER_CNTL)); if (data & SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK) *flags |= AMD_CG_SUPPORT_SDMA_LS; } @@ -1740,7 +1825,7 @@ const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, + .align_mask = 0xff, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .get_rptr = sdma_v4_4_2_ring_get_rptr, @@ -1771,7 +1856,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, + .align_mask = 0xff, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .get_rptr = sdma_v4_4_2_ring_get_rptr, @@ -1802,7 +1887,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev) { - int i; + int i, dev_inst; for (i = 0; i < adev->sdma.num_instances; i++) { adev->sdma.instance[i].ring.funcs = &sdma_v4_4_2_ring_funcs; @@ -1812,6 +1897,11 @@ static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev) &sdma_v4_4_2_page_ring_funcs; adev->sdma.instance[i].page.me = i; } + + dev_inst = GET_INST(SDMA0, i); + /* AID to which SDMA belongs depends on physical instance */ + adev->sdma.instance[i].aid_id = + dev_inst / adev->sdma.num_inst_per_aid; } } @@ -1965,3 +2055,146 @@ const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = { .rev = 0, .funcs = &sdma_v4_4_2_ip_funcs, }; + +static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + if (!amdgpu_sriov_vf(adev)) + sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); + + r = sdma_v4_4_2_inst_start(adev, inst_mask); + + return r; +} + +static int sdma_v4_4_2_xcp_suspend(void *handle, uint32_t inst_mask) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t tmp_mask = inst_mask; + int i; + + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for_each_inst(i, tmp_mask) { + amdgpu_irq_put(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + } + } + + sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask); + sdma_v4_4_2_inst_enable(adev, false, inst_mask); + + return 0; +} + +struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs = { + .suspend = &sdma_v4_4_2_xcp_suspend, + .resume = &sdma_v4_4_2_xcp_resume +}; + +static const struct amdgpu_ras_err_status_reg_entry sdma_v4_2_2_ue_reg_list[] = { + {AMDGPU_RAS_REG_ENTRY(SDMA0, 0, regSDMA_UE_ERR_STATUS_LO, regSDMA_UE_ERR_STATUS_HI), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SDMA"}, +}; + +static const struct amdgpu_ras_memory_id_entry sdma_v4_4_2_ras_memory_list[] = { + {AMDGPU_SDMA_MBANK_DATA_BUF0, "SDMA_MBANK_DATA_BUF0"}, + {AMDGPU_SDMA_MBANK_DATA_BUF1, "SDMA_MBANK_DATA_BUF1"}, + {AMDGPU_SDMA_MBANK_DATA_BUF2, "SDMA_MBANK_DATA_BUF2"}, + {AMDGPU_SDMA_MBANK_DATA_BUF3, "SDMA_MBANK_DATA_BUF3"}, + {AMDGPU_SDMA_MBANK_DATA_BUF4, "SDMA_MBANK_DATA_BUF4"}, + {AMDGPU_SDMA_MBANK_DATA_BUF5, "SDMA_MBANK_DATA_BUF5"}, + {AMDGPU_SDMA_MBANK_DATA_BUF6, "SDMA_MBANK_DATA_BUF6"}, + {AMDGPU_SDMA_MBANK_DATA_BUF7, "SDMA_MBANK_DATA_BUF7"}, + {AMDGPU_SDMA_MBANK_DATA_BUF8, "SDMA_MBANK_DATA_BUF8"}, + {AMDGPU_SDMA_MBANK_DATA_BUF9, "SDMA_MBANK_DATA_BUF9"}, + {AMDGPU_SDMA_MBANK_DATA_BUF10, "SDMA_MBANK_DATA_BUF10"}, + {AMDGPU_SDMA_MBANK_DATA_BUF11, "SDMA_MBANK_DATA_BUF11"}, + {AMDGPU_SDMA_MBANK_DATA_BUF12, "SDMA_MBANK_DATA_BUF12"}, + {AMDGPU_SDMA_MBANK_DATA_BUF13, "SDMA_MBANK_DATA_BUF13"}, + {AMDGPU_SDMA_MBANK_DATA_BUF14, "SDMA_MBANK_DATA_BUF14"}, + {AMDGPU_SDMA_MBANK_DATA_BUF15, "SDMA_MBANK_DATA_BUF15"}, + {AMDGPU_SDMA_UCODE_BUF, "SDMA_UCODE_BUF"}, + {AMDGPU_SDMA_RB_CMD_BUF, "SDMA_RB_CMD_BUF"}, + {AMDGPU_SDMA_IB_CMD_BUF, "SDMA_IB_CMD_BUF"}, + {AMDGPU_SDMA_UTCL1_RD_FIFO, "SDMA_UTCL1_RD_FIFO"}, + {AMDGPU_SDMA_UTCL1_RDBST_FIFO, "SDMA_UTCL1_RDBST_FIFO"}, + {AMDGPU_SDMA_UTCL1_WR_FIFO, "SDMA_UTCL1_WR_FIFO"}, + {AMDGPU_SDMA_DATA_LUT_FIFO, "SDMA_DATA_LUT_FIFO"}, + {AMDGPU_SDMA_SPLIT_DAT_BUF, "SDMA_SPLIT_DAT_BUF"}, +}; + +static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev, + uint32_t sdma_inst, + void *ras_err_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status; + uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst); + + /* sdma v4_4_2 doesn't support query ce counts */ + amdgpu_ras_inst_query_ras_error_count(adev, + sdma_v4_2_2_ue_reg_list, + ARRAY_SIZE(sdma_v4_2_2_ue_reg_list), + sdma_v4_4_2_ras_memory_list, + ARRAY_SIZE(sdma_v4_4_2_ras_memory_list), + sdma_dev_inst, + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + &err_data->ue_count); +} + +static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev, + void *ras_err_status) +{ + uint32_t inst_mask; + int i = 0; + + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for_each_inst(i, inst_mask) + sdma_v4_4_2_inst_query_ras_error_count(adev, i, ras_err_status); + } else { + dev_warn(adev->dev, "SDMA RAS is not supported\n"); + } +} + +static void sdma_v4_4_2_inst_reset_ras_error_count(struct amdgpu_device *adev, + uint32_t sdma_inst) +{ + uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst); + + amdgpu_ras_inst_reset_ras_error_count(adev, + sdma_v4_2_2_ue_reg_list, + ARRAY_SIZE(sdma_v4_2_2_ue_reg_list), + sdma_dev_inst); +} + +static void sdma_v4_4_2_reset_ras_error_count(struct amdgpu_device *adev) +{ + uint32_t inst_mask; + int i = 0; + + inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { + for_each_inst(i, inst_mask) + sdma_v4_4_2_inst_reset_ras_error_count(adev, i); + } else { + dev_warn(adev->dev, "SDMA RAS is not supported\n"); + } +} + +static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = { + .query_ras_error_count = sdma_v4_4_2_query_ras_error_count, + .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count, +}; + +static struct amdgpu_sdma_ras sdma_v4_4_2_ras = { + .ras_block = { + .hw_ops = &sdma_v4_4_2_ras_hw_ops, + }, +}; + +static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev) +{ + adev->sdma.ras = &sdma_v4_4_2_ras; +} diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h index 4814e8a074d6..d516145529bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.h @@ -27,4 +27,6 @@ extern const struct amd_ip_funcs sdma_v4_4_2_ip_funcs; extern const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block; +extern struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 92e1299be021..5c4d4df9cf94 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -819,8 +819,6 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) /* enable DMA IBs */ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); - ring->sched.ready = true; - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ sdma_v5_0_ctx_switch_enable(adev, true); sdma_v5_0_enable(adev, true); @@ -1389,7 +1387,7 @@ static int sdma_v5_0_sw_init(void *handle) (adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset : (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index ca7e8757d78e..a7b230e5a26d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -617,18 +617,14 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) /* enable DMA IBs */ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); - ring->sched.ready = true; - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ sdma_v5_2_ctx_switch_enable(adev, true); sdma_v5_2_enable(adev, true); } - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->sched.ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, true); @@ -1253,7 +1249,7 @@ static int sdma_v5_2_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 3d9a80511a45..3b03dda854fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -238,6 +238,8 @@ static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * * @ring: amdgpu ring pointer * @ib: IB object to schedule + * @flags: unused + * @job: job to retrieve vmid from * * Schedule an IB in the DMA ring. */ @@ -585,16 +587,12 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) /* enable DMA IBs */ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); - ring->sched.ready = true; - if (amdgpu_sriov_vf(adev)) sdma_v6_0_enable(adev, true); r = amdgpu_ring_test_helper(ring); - if (r) { - ring->sched.ready = false; + if (r) return r; - } if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, true); @@ -942,6 +940,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) * sdma_v6_0_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring. * Returns 0 on success, error on failure. @@ -1122,6 +1121,7 @@ static void sdma_v6_0_vm_set_pte_pde(struct amdgpu_ib *ib, /** * sdma_v6_0_ring_pad_ib - pad the IB * @ib: indirect buffer to fill with padding + * @ring: amdgpu ring pointer * * Pad the IB with NOPs to a boundary multiple of 8. */ @@ -1171,6 +1171,8 @@ static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA * * @ring: amdgpu_ring pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA. @@ -1298,7 +1300,7 @@ static int sdma_v6_0_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 7f99e130acd0..f64b87b11b1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1181,12 +1181,12 @@ static uint32_t si_get_register_value(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index abca8b529721..42c4547f32ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -174,8 +174,6 @@ static int si_dma_start(struct amdgpu_device *adev) WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); - ring->sched.ready = true; - r = amdgpu_ring_test_helper(ring); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c new file mode 100644 index 000000000000..4368a5891eeb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c @@ -0,0 +1,103 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v13_0_3.h" +#include "soc15_common.h" +#include "smuio/smuio_13_0_3_offset.h" +#include "smuio/smuio_13_0_3_sh_mask.h" + +#define PKG_TYPE_MASK 0x00000003L + +/** + * smuio_v13_0_3_get_die_id - query die id from FCH. + * + * @adev: amdgpu device pointer + * + * Returns die id + */ +static u32 smuio_v13_0_3_get_die_id(struct amdgpu_device *adev) +{ + u32 data, die_id; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + die_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, DIE_ID); + + return die_id; +} + +/** + * smuio_v13_0_3_get_socket_id - query socket id from FCH + * + * @adev: amdgpu device pointer + * + * Returns socket id + */ +static u32 smuio_v13_0_3_get_socket_id(struct amdgpu_device *adev) +{ + u32 data, socket_id; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID); + + return socket_id; +} + +/** + * smuio_v13_0_3_get_pkg_type - query package type set by MP1/bootcode + * + * @adev: amdgpu device pointer + * + * Returns package type + */ + +static enum amdgpu_pkg_type smuio_v13_0_3_get_pkg_type(struct amdgpu_device *adev) +{ + enum amdgpu_pkg_type pkg_type; + u32 data; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, PKG_TYPE); + /* pkg_type[4:0] + * + * bit 1 == 1 APU form factor + * + * b0100 - b1111 - Reserved + */ + switch (data & PKG_TYPE_MASK) { + case 0x2: + pkg_type = AMDGPU_PKG_TYPE_APU; + break; + default: + pkg_type = AMDGPU_PKG_TYPE_UNKNOWN; + break; + } + + return pkg_type; +} + + +const struct amdgpu_smuio_funcs smuio_v13_0_3_funcs = { + .get_die_id = smuio_v13_0_3_get_die_id, + .get_socket_id = smuio_v13_0_3_get_socket_id, + .get_pkg_type = smuio_v13_0_3_get_pkg_type, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h new file mode 100644 index 000000000000..795f66c5a58b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.h @@ -0,0 +1,30 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V13_0_3_H__ +#define __SMUIO_V13_0_3_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v13_0_3_funcs; + +#endif /* __SMUIO_V13_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index a2fd1ffadb59..afcaeadda4c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -153,6 +153,24 @@ static const struct amdgpu_video_codecs rn_video_codecs_decode = .codec_array = rn_video_codecs_decode_array, }; +static const struct amdgpu_video_codec_info vcn_4_0_3_video_codecs_decode_array[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_decode = { + .codec_count = ARRAY_SIZE(vcn_4_0_3_video_codecs_decode_array), + .codec_array = vcn_4_0_3_video_codecs_decode_array, +}; + +static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = { + .codec_count = 0, + .codec_array = NULL, +}; + static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -185,6 +203,12 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode, else *codecs = &rn_video_codecs_decode; return 0; + case IP_VERSION(4, 0, 3): + if (encode) + *codecs = &vcn_4_0_3_video_codecs_encode; + else + *codecs = &vcn_4_0_3_video_codecs_decode; + return 0; default: return -EINVAL; } @@ -312,7 +336,7 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) void soc15_grbm_select(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 queue, u32 vmid) + u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id) { u32 grbm_gfx_cntl = 0; grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe); @@ -320,12 +344,7 @@ void soc15_grbm_select(struct amdgpu_device *adev, grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); - WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl); -} - -static void soc15_vga_set_state(struct amdgpu_device *adev, bool state) -{ - /* todo */ + WREG32_SOC15_RLC_SHADOW(GC, xcc_id, mmGRBM_GFX_CNTL, grbm_gfx_cntl); } static bool soc15_read_disabled_bios(struct amdgpu_device *adev) @@ -364,12 +383,12 @@ static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_n mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } @@ -533,6 +552,15 @@ soc15_asic_reset_method(struct amdgpu_device *adev) if (connected_to_cpu) return AMD_RESET_METHOD_MODE2; break; + case IP_VERSION(13, 0, 6): + /* Use gpu_recovery param to target a reset method. + * Enable triggering of GPU reset only if specified + * by module parameter. + */ + if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5) + return AMD_RESET_METHOD_MODE2; + else + return AMD_RESET_METHOD_NONE; default: break; } @@ -817,7 +845,6 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .read_register = &soc15_read_register, .reset = &soc15_asic_reset, .reset_method = &soc15_asic_reset_method, - .set_vga_state = &soc15_vga_set_state, .get_xclk = &soc15_get_xclk, .set_uvd_clocks = &soc15_set_uvd_clocks, .set_vce_clocks = &soc15_set_vce_clocks, @@ -839,7 +866,6 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .read_register = &soc15_read_register, .reset = &soc15_asic_reset, .reset_method = &soc15_asic_reset_method, - .set_vga_state = &soc15_vga_set_state, .get_xclk = &soc15_get_xclk, .set_uvd_clocks = &soc15_set_uvd_clocks, .set_vce_clocks = &soc15_set_vce_clocks, @@ -854,6 +880,28 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .query_video_codecs = &soc15_query_video_codecs, }; +static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = +{ + .read_disabled_bios = &soc15_read_disabled_bios, + .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom, + .read_register = &soc15_read_register, + .reset = &soc15_asic_reset, + .reset_method = &soc15_asic_reset_method, + .get_xclk = &soc15_get_xclk, + .set_uvd_clocks = &soc15_set_uvd_clocks, + .set_vce_clocks = &soc15_set_vce_clocks, + .get_config_memsize = &soc15_get_config_memsize, + .need_full_reset = &soc15_need_full_reset, + .init_doorbell_index = &aqua_vanjaram_doorbell_index_init, + .get_pcie_usage = &vega20_get_pcie_usage, + .need_reset_on_init = &soc15_need_reset_on_init, + .get_pcie_replay_count = &soc15_get_pcie_replay_count, + .supports_baco = &soc15_supports_baco, + .pre_asic_init = &soc15_pre_asic_init, + .query_video_codecs = &soc15_query_video_codecs, + .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing, +}; + static int soc15_common_early_init(void *handle) { #define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) @@ -867,6 +915,8 @@ static int soc15_common_early_init(void *handle) adev->smc_wreg = NULL; adev->pcie_rreg = &amdgpu_device_indirect_rreg; adev->pcie_wreg = &amdgpu_device_indirect_wreg; + adev->pcie_rreg_ext = &amdgpu_device_indirect_rreg_ext; + adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext; adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64; adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64; adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg; @@ -1095,9 +1145,18 @@ static int soc15_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x3c; break; case IP_VERSION(9, 4, 3): - adev->asic_funcs = &vega20_asic_funcs; - adev->cg_flags = 0; - adev->pg_flags = 0; + adev->asic_funcs = &aqua_vanjaram_asic_funcs; + adev->cg_flags = + AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_IH_CG; + adev->pg_flags = + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG; + adev->external_rev_id = adev->rev_id + 0x46; break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index efc2a253e8db..eac54042c6c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -100,7 +100,7 @@ struct soc15_ras_field_entry { #define SOC15_RAS_REG_FIELD_VAL(val, entry, field) SOC15_REG_FIELD_VAL((val), (entry).field##_count_mask, (entry).field##_count_shift) void soc15_grbm_select(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 queue, u32 vmid); + u32 me, u32 pipe, u32 queue, u32 vmid, int xcc_id); void soc15_set_virt_ops(struct amdgpu_device *adev); void soc15_program_register_sequence(struct amdgpu_device *adev, @@ -111,7 +111,11 @@ int vega10_reg_base_init(struct amdgpu_device *adev); int vega20_reg_base_init(struct amdgpu_device *adev); int arct_reg_base_init(struct amdgpu_device *adev); int aldebaran_reg_base_init(struct amdgpu_device *adev); +void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev); +u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id); +int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev); void vega10_doorbell_index_init(struct amdgpu_device *adev); void vega20_doorbell_index_init(struct amdgpu_device *adev); +void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 9fefd403e14f..96948a59f8dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -24,8 +24,18 @@ #ifndef __SOC15_COMMON_H__ #define __SOC15_COMMON_H__ +/* GET_INST returns the physical instance corresponding to a logical instance */ +#define GET_INST(ip, inst) \ + (adev->ip_map.logical_to_dev_inst ? \ + adev->ip_map.logical_to_dev_inst(adev, ip##_HWIP, inst) : inst) +#define GET_MASK(ip, mask) \ + (adev->ip_map.logical_to_dev_mask ? \ + adev->ip_map.logical_to_dev_mask(adev, ip##_HWIP, mask) : mask) + /* Register Access Macros */ #define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) +#define SOC15_REG_OFFSET1(ip, inst, reg, offset) \ + (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)+(offset)) #define __WREG32_SOC15_RLC__(reg, value, flag, hwip) \ ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \ @@ -66,7 +76,8 @@ AMDGPU_REGS_NO_KIQ, ip##_HWIP) #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \ - __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, 0, ip##_HWIP) + __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)) + \ + (offset), 0, ip##_HWIP) #define WREG32_SOC15(ip, inst, reg, value) \ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), \ @@ -86,31 +97,15 @@ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, \ value, 0, ip##_HWIP) -#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \ -({ int ret = 0; \ - do { \ - uint32_t old_ = 0; \ - uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ - uint32_t loop = adev->usec_timeout; \ - ret = 0; \ - while ((tmp_ & (mask)) != (expected_value)) { \ - if (old_ != tmp_) { \ - loop = adev->usec_timeout; \ - old_ = tmp_; \ - } else \ - udelay(1); \ - tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ - loop--; \ - if (!loop) { \ - DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08x\n", \ - inst, #reg, (unsigned)expected_value, (unsigned)(tmp_ & (mask))); \ - ret = -ETIMEDOUT; \ - break; \ - } \ - } \ - } while (0); \ - ret; \ -}) +#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \ + amdgpu_device_wait_on_rreg(adev, inst, \ + (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)), \ + #reg, expected_value, mask) + +#define SOC15_WAIT_ON_RREG_OFFSET(ip, inst, reg, offset, expected_value, mask) \ + amdgpu_device_wait_on_rreg(adev, inst, \ + (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg) + (offset)), \ + #reg, expected_value, mask) #define WREG32_RLC(reg, value) \ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP) @@ -157,10 +152,10 @@ do { \ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ if (amdgpu_sriov_fullaccess(adev)) { \ - uint32_t r2 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG2; \ - uint32_t r3 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG3; \ - uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][prefix##GRBM_GFX_CNTL_BASE_IDX] + prefix##GRBM_GFX_CNTL; \ - uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][prefix##GRBM_GFX_INDEX_BASE_IDX] + prefix##GRBM_GFX_INDEX; \ + uint32_t r2 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG2; \ + uint32_t r3 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG3; \ + uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][inst][prefix##GRBM_GFX_CNTL_BASE_IDX] + prefix##GRBM_GFX_CNTL; \ + uint32_t grbm_idx = adev->reg_offset[GC_HWIP][inst][prefix##GRBM_GFX_INDEX_BASE_IDX] + prefix##GRBM_GFX_INDEX; \ if (target_reg == grbm_cntl) \ WREG32(r2, value); \ else if (target_reg == grbm_idx) \ @@ -176,13 +171,13 @@ #define WREG32_SOC15_RLC(ip, inst, reg, value) \ do { \ - uint32_t target_reg = adev->reg_offset[ip##_HWIP][0][reg##_BASE_IDX] + reg;\ + uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ __WREG32_SOC15_RLC__(target_reg, value, AMDGPU_REGS_RLC, ip##_HWIP); \ } while (0) #define WREG32_SOC15_RLC_EX(prefix, ip, inst, reg, value) \ do { \ - uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\ + uint32_t target_reg = adev->reg_offset[GC_HWIP][inst][reg##_BASE_IDX] + reg;\ WREG32_RLC_EX(prefix, target_reg, value); \ } while (0) @@ -199,4 +194,14 @@ #define RREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset) \ __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP) +/* inst equals to ext for some IPs */ +#define RREG32_SOC15_EXT(ip, inst, reg, ext) \ + RREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \ + + adev->asic_funcs->encode_ext_smn_addressing(ext)) \ + +#define WREG32_SOC15_EXT(ip, inst, reg, ext, value) \ + WREG32_PCIE_EXT((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) * 4 \ + + adev->asic_funcs->encode_ext_smn_addressing(ext), \ + value) \ + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index d77162536514..e5e5d68a4d70 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -248,11 +248,6 @@ void soc21_grbm_select(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, grbm_gfx_cntl); } -static void soc21_vga_set_state(struct amdgpu_device *adev, bool state) -{ - /* todo */ -} - static bool soc21_read_disabled_bios(struct amdgpu_device *adev) { /* todo */ @@ -288,12 +283,12 @@ static uint32_t soc21_read_indexed_register(struct amdgpu_device *adev, u32 se_n mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } @@ -542,9 +537,9 @@ static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev, bool enter) { if (enter) - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); else - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); if (adev->gfx.funcs->update_perfmon_mgcg) adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); @@ -559,7 +554,6 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = .read_register = &soc21_read_register, .reset = &soc21_asic_reset, .reset_method = &soc21_asic_reset_method, - .set_vga_state = &soc21_vga_set_state, .get_xclk = &soc21_get_xclk, .set_uvd_clocks = &soc21_set_uvd_clocks, .set_vce_clocks = &soc21_set_vce_clocks, diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 30d0482ac466..879bb7af297c 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -129,6 +129,8 @@ struct ta_ras_trigger_error_input { struct ta_ras_init_flags { uint8_t poison_mode_en; uint8_t dgpu_mode; + uint16_t xcc_mask; + uint8_t channel_dis_num; }; struct ta_ras_output_flags { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index d51ae0bc36f7..46bfdee79bfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -444,6 +444,11 @@ static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *ade umc_v8_10_ecc_info_query_error_address, ras_error_status); } +static void umc_v8_10_set_eeprom_table_version(struct amdgpu_ras_eeprom_table_header *hdr) +{ + hdr->version = RAS_TABLE_VER_V2_1; +} + const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { .query_ras_error_count = umc_v8_10_query_ras_error_count, .query_ras_error_address = umc_v8_10_query_ras_error_address, @@ -457,4 +462,5 @@ struct amdgpu_umc_ras umc_v8_10_ras = { .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address, + .set_eeprom_table_version = umc_v8_10_set_eeprom_table_version, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h index c6dfd433fec7..dc12e0af5451 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h @@ -33,7 +33,8 @@ /* Total channel instances for all available umc nodes */ #define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \ - (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->gmc.num_umc) + (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * \ + (adev)->gmc.num_umc - hweight32((adev)->gmc.m_half_use) * 2) /* UMC regiser per channel offset */ #define UMC_V8_10_PER_CHANNEL_OFFSET 0x400 diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index e32b656b3dab..abaa4463e906 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -444,7 +444,7 @@ static int uvd_v7_0_sw_init(void *handle) continue; if (!amdgpu_sriov_vf(adev)) { ring = &adev->uvd.inst[j].ring; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "uvd_%d", ring->me); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0, @@ -455,7 +455,7 @@ static int uvd_v7_0_sw_init(void *handle) for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst[j].ring_enc[i]; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i); if (amdgpu_sriov_vf(adev)) { ring->use_doorbell = true; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 57b85bb6a1e4..e0b70cd3b697 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -466,7 +466,7 @@ static int vce_v4_0_sw_init(void *handle) enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i); ring = &adev->vce.ring[i]; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vce%d", i); if (amdgpu_sriov_vf(adev)) { /* DOORBELL only works under SRIOV */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 761c28fa6ec1..16feb491adf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -120,7 +120,7 @@ static int vcn_v1_0_sw_init(void *handle) return r; ring = &adev->vcn.inst->ring_dec; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -142,7 +142,7 @@ static int vcn_v1_0_sw_init(void *handle) enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i); ring = &adev->vcn.inst->ring_enc[i]; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_enc%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, hw_prio, NULL); @@ -211,7 +211,7 @@ static int vcn_v1_0_hw_init(void *handle) goto done; } - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; r = amdgpu_ring_test_helper(ring); if (r) goto done; @@ -1304,7 +1304,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); /* Restore */ - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | @@ -1802,7 +1802,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec)) + if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec)) new_state.jpeg = VCN_DPG_STATE__PAUSE; else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; @@ -1810,7 +1810,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) adev->vcn.pause_dpg_mode(adev, 0, &new_state); } - fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec); + fences += amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec); fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_dec); if (fences == 0) { @@ -1832,7 +1832,7 @@ static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring) mutex_lock(&adev->vcn.vcn1_jpeg1_workaround); - if (amdgpu_fence_wait_empty(&ring->adev->jpeg.inst->ring_dec)) + if (amdgpu_fence_wait_empty(ring->adev->jpeg.inst->ring_dec)) DRM_ERROR("VCN dec: jpeg dec ring may not be empty\n"); vcn_v1_0_set_pg_for_begin_use(ring, set_clocks); @@ -1864,7 +1864,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks) else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec)) + if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec)) new_state.jpeg = VCN_DPG_STATE__PAUSE; else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 7c2b3aa48083..c975aed2f6c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -129,7 +129,7 @@ static int vcn_v2_0_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, @@ -160,7 +160,7 @@ static int vcn_v2_0_sw_init(void *handle) ring = &adev->vcn.inst->ring_enc[i]; ring->use_doorbell = true; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); if (!amdgpu_sriov_vf(adev)) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; else diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 515681c88dcb..bb1875f926f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -188,9 +188,9 @@ static int vcn_v2_5_sw_init(void *handle) (amdgpu_sriov_vf(adev) ? 2*j : 8*j); if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec_%d", j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, @@ -208,9 +208,9 @@ static int vcn_v2_5_sw_init(void *handle) (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j)); if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_enc_%d.%d", j, i); r = amdgpu_ring_init(adev, ring, 512, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 3eab186261aa..c8f63b3c6f69 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -189,7 +189,7 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; } - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, AMDGPU_RING_PRIO_DEFAULT, @@ -213,7 +213,7 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; } - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_enc_%d.%d", i, j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, hw_prio, &adev->vcn.inst[i].sched_score); @@ -1313,7 +1313,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) header.version = MMSCH_VERSION; header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2; - for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) { + for (i = 0; i < MMSCH_V3_0_VCN_INSTANCES; i++) { header.inst[i].init_status = 0; header.inst[i].table_offset = 0; header.inst[i].table_size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index da126ff8bcbc..b48bb5212488 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -153,7 +153,7 @@ static int vcn_v4_0_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_unified_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, @@ -1243,7 +1243,7 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) header.version = MMSCH_VERSION; header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2; - for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) { + for (i = 0; i < MMSCH_V4_0_VCN_INSTANCES; i++) { header.inst[i].init_status = 0; header.inst[i].table_offset = 0; header.inst[i].table_size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c new file mode 100644 index 000000000000..5d67b8b8a3d6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -0,0 +1,1541 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <drm/drm_drv.h> + +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "soc15_hw_ip.h" +#include "vcn_v2_0.h" + +#include "vcn/vcn_4_0_3_offset.h" +#include "vcn/vcn_4_0_3_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" + +#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL +#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX +#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA +#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX + +#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 +#define VCN1_VID_SOC_ADDRESS_3_0 0x48300 + +static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); +static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v4_0_3_set_powergating_state(void *handle, + enum amd_powergating_state state); +static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); +static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring); +static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); +static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev, + int inst_idx, bool indirect); +/** + * vcn_v4_0_3_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int vcn_v4_0_3_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* re-use enc ring as unified ring */ + adev->vcn.num_enc_rings = 1; + + vcn_v4_0_3_set_unified_ring_funcs(adev); + vcn_v4_0_3_set_irq_funcs(adev); + vcn_v4_0_3_set_ras_funcs(adev); + + return amdgpu_vcn_early_init(adev); +} + +/** + * vcn_v4_0_3_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v4_0_3_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev); + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + /* VCN DEC TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + vcn_inst = GET_INST(VCN, i); + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst; + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); + sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, + &adev->vcn.inst[i].sched_score); + if (r) + return r; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = true; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + } + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; + + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) { + r = amdgpu_vcn_ras_sw_init(adev); + if (r) { + dev_err(adev->dev, "Failed to initialize vcn ras block!\n"); + return r; + } + } + + return 0; +} + +/** + * vcn_v4_0_3_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v4_0_3_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r, idx; + + if (drm_dev_enter(&adev->ddev, &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = cpu_to_le32(false); + } + drm_dev_exit(idx); + } + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v4_0_3_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v4_0_3_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) { + adev->nbio.funcs->vcn_doorbell_range( + adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst, + adev->vcn.inst[i].aid_id); + + WREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL, + ring->doorbell_index + << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL); + } + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } + +done: + if (!r) + DRM_DEV_INFO(adev->dev, "VCN decode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); + + return r; +} + +/** + * vcn_v4_0_3_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v4_0_3_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + if (adev->vcn.cur_state != AMD_PG_STATE_GATE) + vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + + return 0; +} + +/** + * vcn_v4_0_3_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v4_0_3_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = vcn_v4_0_3_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v4_0_3_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v4_0_3_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v4_0_3_hw_init(adev); + + return r; +} + +/** + * vcn_v4_0_3_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t offset, size, vcn_inst; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + vcn_inst = GET_INST(VCN, inst_idx); + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15( + VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx] + .tmr_mc_addr_lo)); + WREG32_SOC15( + VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx] + .tmr_mc_addr_hi)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, + AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, + AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15( + VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr)); + WREG32_SOC15( + VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15( + VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); +} + +/** + * vcn_v4_0_3_mc_resume_dpg_mode - memory controller programming for dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Let the VCN memory controller know it's offsets with dpg mode + */ +static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + +/** + * vcn_v4_0_3_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number + * + * Disable clock gating for VCN block + */ +static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t data; + int vcn_inst; + + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + return; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* VCN disable CGC */ + data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__MMSCH_MASK); + + WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE, data); + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF); + + data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data); +} + +/** + * vcn_v4_0_3_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode + * + * @adev: amdgpu_device pointer + * @sram_sel: sram select + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Disable clock gating for VCN block with dpg mode + */ +static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel, + int inst_idx, uint8_t indirect) +{ + uint32_t reg_data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + return; + + /* enable sw clock gating control */ + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + reg_data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_CGC_CTRL), reg_data, sram_sel, indirect); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_CGC_GATE), 0, sram_sel, indirect); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); +} + +/** + * vcn_v4_0_3_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number + * + * Enable clock gating for VCN block + */ +static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t data; + int vcn_inst; + + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + return; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* enable VCN CGC */ + data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK); + WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data); +} + +/** + * vcn_v4_0_3_start_dpg_mode - VCN start with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Start VCN block with dpg mode + */ +static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared = + adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_ring *ring; + int vcn_inst; + uint32_t tmp; + + vcn_inst = GET_INST(VCN, inst_idx); + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp); + + if (indirect) { + DRM_DEV_DEBUG(adev->dev, "VCN %d start: on AID %d", + inst_idx, adev->vcn.inst[inst_idx].aid_id); + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */ + WREG32_SOC15_DPG_MODE(inst_idx, 0xDEADBEEF, + adev->vcn.inst[inst_idx].aid_id, 0, true); + } + + /* enable clock gating */ + vcn_v4_0_3_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + tmp |= UVD_VCPU_CNTL__BLK_RST_MASK; + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interrupt */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect); + + /* setup regUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_MPC_CNTL), + 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_MPC_SET_MUXA0), + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_MPC_SET_MUXB0), + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_MPC_SET_MUX), + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); + + vcn_v4_0_3_mc_resume_dpg_mode(adev, inst_idx, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable LMI MC and UMC channels */ + tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect); + + vcn_v4_0_3_enable_ras(adev, inst_idx, indirect); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + psp_update_vcn_sram(adev, 0, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + + /* program the RB_BASE for ring buffer */ + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, + upper_32_bits(ring->gpu_addr)); + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, + ring->ring_size / sizeof(uint32_t)); + + /* resetting ring, fw should not check RB ring */ + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + /*resetting done, fw can check RB ring */ + fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + return 0; +} + +/** + * vcn_v4_0_3_start - VCN start + * + * @adev: amdgpu_device pointer + * + * Start VCN block + */ +static int vcn_v4_0_3_start(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_ring *ring; + int i, j, k, r, vcn_inst; + uint32_t tmp; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v4_0_3_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + continue; + } + + vcn_inst = GET_INST(VCN, i); + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | + UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp); + + /*SW clock gating */ + vcn_v4_0_3_disable_clock_gating(adev, i); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, + ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, + tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup regUVD_MPC_CNTL */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup UVD_MPC_SET_MUX */ + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + + vcn_v4_0_3_mc_resume(adev, i); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, vcn_inst, + regUVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_DEV_ERROR(adev->dev, + "VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, + regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, + regUVD_VCPU_CNTL), + 0, ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + + if (r) { + DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n"); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + ring = &adev->vcn.inst[i].ring_enc[0]; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + /* program the RB_BASE for ring buffer */ + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, + upper_32_bits(ring->gpu_addr)); + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, + ring->ring_size / sizeof(uint32_t)); + + /* resetting ring, fw should not check RB ring */ + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + fw_shared->sq.queue_mode &= + cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF)); + + } + return 0; +} + +/** + * vcn_v4_0_3_stop_dpg_mode - VCN stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop VCN block with dpg mode + */ +static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t tmp; + int vcn_inst; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); + + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + return 0; +} + +/** + * vcn_v4_0_3_stop - VCN stop + * + * @adev: amdgpu_device pointer + * + * Stop VCN block + */ +static int vcn_v4_0_3_stop(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + int i, r = 0, vcn_inst; + uint32_t tmp; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v4_0_3_stop_dpg_mode(adev, i); + continue; + } + + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, + UVD_STATUS__IDLE, 0x7); + if (r) + goto Done; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, + tmp); + if (r) + goto Done; + + /* stall UMC channel */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, + tmp); + if (r) + goto Done; + + /* Unblock VCPU Register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* reset LMI UMC/LMI/VCPU */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* clear VCN status */ + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); + + /* apply HW clock gating */ + vcn_v4_0_3_enable_clock_gating(adev, i); + } +Done: + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +/** + * vcn_v4_0_3_pause_dpg_mode - VCN pause with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, + struct dpg_pause_state *new_state) +{ + + return 0; +} + +/** + * vcn_v4_0_3_unified_ring_get_rptr - get unified read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified read pointer + */ +static uint64_t vcn_v4_0_3_unified_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR); +} + +/** + * vcn_v4_0_3_unified_ring_get_wptr - get unified write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified write pointer + */ +static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), + regUVD_RB_WPTR); +} + +/** + * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR, + lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .get_rptr = vcn_v4_0_3_unified_ring_get_rptr, + .get_wptr = vcn_v4_0_3_unified_ring_get_wptr, + .set_wptr = vcn_v4_0_3_unified_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_unified_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v4_0_3_set_unified_ring_funcs - set unified ring functions + * + * @adev: amdgpu_device pointer + * + * Set unified ring functions + */ +static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev) +{ + int i, vcn_inst; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_3_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; + vcn_inst = GET_INST(VCN, i); + adev->vcn.inst[i].aid_id = + vcn_inst / adev->vcn.num_inst_per_aid; + } + DRM_DEV_INFO(adev->dev, "VCN decode is enabled in VM mode\n"); +} + +/** + * vcn_v4_0_3_is_idle - check VCN block is idle + * + * @handle: amdgpu_device pointer + * + * Check whether VCN block is idle + */ +static bool vcn_v4_0_3_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == + UVD_STATUS__IDLE); + } + + return ret; +} + +/** + * vcn_v4_0_3_wait_for_idle - wait for VCN block idle + * + * @handle: amdgpu_device pointer + * + * Wait for VCN block idle + */ +static int vcn_v4_0_3_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, + UVD_STATUS__IDLE, UVD_STATUS__IDLE); + if (ret) + return ret; + } + + return ret; +} + +/* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state + * + * @handle: amdgpu_device pointer + * @state: clock gating state + * + * Set VCN block clockgating state + */ +static int vcn_v4_0_3_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (enable) { + if (RREG32_SOC15(VCN, GET_INST(VCN, i), + regUVD_STATUS) != UVD_STATUS__IDLE) + return -EBUSY; + vcn_v4_0_3_enable_clock_gating(adev, i); + } else { + vcn_v4_0_3_disable_clock_gating(adev, i); + } + } + return 0; +} + +/** + * vcn_v4_0_3_set_powergating_state - set VCN block powergating state + * + * @handle: amdgpu_device pointer + * @state: power gating state + * + * Set VCN block powergating state + */ +static int vcn_v4_0_3_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v4_0_3_stop(adev); + else + ret = vcn_v4_0_3_start(adev); + + if (!ret) + adev->vcn.cur_state = state; + + return ret; +} + +/** + * vcn_v4_0_3_set_interrupt_state - set VCN block interrupt state + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @type: interrupt types + * @state: interrupt states + * + * Set VCN block interrupt state + */ +static int vcn_v4_0_3_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +/** + * vcn_v4_0_3_process_interrupt - process VCN block interrupt + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @entry: interrupt entry from clients and sources + * + * Process VCN block interrupt + */ +static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t i, inst; + + i = node_id_to_phys_map[entry->node_id]; + + DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n"); + + for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst) + if (adev->vcn.inst[inst].aid_id == i) + break; + + if (inst >= adev->vcn.num_vcn_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown VCN instance %d", + entry->node_id); + return 0; + } + + switch (entry->src_id) { + case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = { + .set = vcn_v4_0_3_set_interrupt_state, + .process = vcn_v4_0_3_process_interrupt, +}; + +/** + * vcn_v4_0_3_set_irq_funcs - set VCN block interrupt irq functions + * + * @adev: amdgpu_device pointer + * + * Set VCN block interrupt irq functions + */ +static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst->irq.num_types++; + } + adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs; +} + +static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { + .name = "vcn_v4_0_3", + .early_init = vcn_v4_0_3_early_init, + .late_init = NULL, + .sw_init = vcn_v4_0_3_sw_init, + .sw_fini = vcn_v4_0_3_sw_fini, + .hw_init = vcn_v4_0_3_hw_init, + .hw_fini = vcn_v4_0_3_hw_fini, + .suspend = vcn_v4_0_3_suspend, + .resume = vcn_v4_0_3_resume, + .is_idle = vcn_v4_0_3_is_idle, + .wait_for_idle = vcn_v4_0_3_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, + .set_powergating_state = vcn_v4_0_3_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 4, + .minor = 0, + .rev = 3, + .funcs = &vcn_v4_0_3_ip_funcs, +}; + +static const struct amdgpu_ras_err_status_reg_entry vcn_v4_0_3_ue_reg_list[] = { + {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDD, regVCN_UE_ERR_STATUS_HI_VIDD), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDD"}, + {AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDV, regVCN_UE_ERR_STATUS_HI_VIDV), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDV"}, +}; + +static void vcn_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev, + uint32_t vcn_inst, + void *ras_err_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status; + + /* vcn v4_0_3 only support query uncorrectable errors */ + amdgpu_ras_inst_query_ras_error_count(adev, + vcn_v4_0_3_ue_reg_list, + ARRAY_SIZE(vcn_v4_0_3_ue_reg_list), + NULL, 0, GET_INST(VCN, vcn_inst), + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + &err_data->ue_count); +} + +static void vcn_v4_0_3_query_ras_error_count(struct amdgpu_device *adev, + void *ras_err_status) +{ + uint32_t i; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) { + dev_warn(adev->dev, "VCN RAS is not supported\n"); + return; + } + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + vcn_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status); +} + +static void vcn_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev, + uint32_t vcn_inst) +{ + amdgpu_ras_inst_reset_ras_error_count(adev, + vcn_v4_0_3_ue_reg_list, + ARRAY_SIZE(vcn_v4_0_3_ue_reg_list), + GET_INST(VCN, vcn_inst)); +} + +static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev) +{ + uint32_t i; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) { + dev_warn(adev->dev, "VCN RAS is not supported\n"); + return; + } + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + vcn_v4_0_3_inst_reset_ras_error_count(adev, i); +} + +static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = { + .query_ras_error_count = vcn_v4_0_3_query_ras_error_count, + .reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count, +}; + +static struct amdgpu_vcn_ras vcn_v4_0_3_ras = { + .ras_block = { + .hw_ops = &vcn_v4_0_3_ras_hw_ops, + }, +}; + +static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) +{ + adev->vcn.ras = &vcn_v4_0_3_ras; +} + +static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev, + int inst_idx, bool indirect) +{ + uint32_t tmp; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + return; + + tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK | + VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL), + tmp, 0, indirect); + + tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN), + tmp, 0, indirect); +} diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h new file mode 100644 index 000000000000..0b046114373a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h @@ -0,0 +1,29 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V4_0_3_H__ +#define __VCN_V4_0_3_H__ + +extern const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block; + +#endif /* __VCN_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 536128447b71..4d719df376a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -334,7 +334,8 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) vega20_setup_retry_doorbell(adev->irq.retry_cam_doorbell_index)); /* Enable IH Retry CAM */ - if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0)) + if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0) || + adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2)) WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN, ENABLE, 1); else @@ -526,6 +527,7 @@ static int vega20_ih_early_init(void *handle) static int vega20_ih_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool use_bus_addr = true; int r; r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0, @@ -533,14 +535,18 @@ static int vega20_ih_sw_init(void *handle) if (r) return r; - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true); + if ((adev->flags & AMD_IS_APU) && + (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2))) + use_bus_addr = false; + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); if (r) return r; adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, use_bus_addr); if (r) return r; @@ -559,7 +565,7 @@ static int vega20_ih_sw_init(void *handle) /* initialize ih control registers offset */ vega20_ih_init_register_offset(adev); - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, use_bus_addr); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index c0360dbebd4b..6a8494f98d3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -587,11 +587,6 @@ void vi_srbm_select(struct amdgpu_device *adev, WREG32(mmSRBM_GFX_CNTL, srbm_gfx_cntl); } -static void vi_vga_set_state(struct amdgpu_device *adev, bool state) -{ - /* todo */ -} - static bool vi_read_disabled_bios(struct amdgpu_device *adev) { u32 bus_cntl; @@ -769,12 +764,12 @@ static uint32_t vi_get_register_value(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0); val = RREG32(reg_offset); if (se_num != 0xffffffff || sh_num != 0xffffffff) - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); return val; } else { @@ -1442,7 +1437,6 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .read_register = &vi_read_register, .reset = &vi_asic_reset, .reset_method = &vi_asic_reset_method, - .set_vga_state = &vi_vga_set_state, .get_xclk = &vi_get_xclk, .set_uvd_clocks = &vi_set_uvd_clocks, .set_vce_clocks = &vi_set_vce_clocks, |