summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2019-05-31 09:33:29 +1000
committerDave Airlie <airlied@redhat.com>2019-05-31 10:04:39 +1000
commit91c1ead6aee22d4595f50ba66070b94a4a8f84a9 (patch)
tree066ffa1c352b6257dd37cda6a1df53159e133f2e
parent14ee642c2ab0a3d8a1ded11fade692d8b77172b9 (diff)
parentcf401e2856b27b2deeada498eab864e2a50cf219 (diff)
Merge branch 'drm-next-5.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
New stuff for 5.3: - Add new thermal sensors for vega asics - Various RAS fixes - Add sysfs interface for memory interface utilization - Use HMM rather than mmu notifier for user pages - Expose xgmi topology via kfd - SR-IOV fixes - Fixes for manual driver reload - Add unique identifier for vega asics - Clean up user fence handling with UVD/VCE/VCN blocks - Convert DC to use core bpc attribute rather than a custom one - Add GWS support for KFD - Vega powerplay improvements - Add CRC support for DCE 12 - SR-IOV support for new security policy - Various cleanups From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190529220944.14464-1-alexander.deucher@amd.com
-rw-r--r--Documentation/gpu/amdgpu.rst20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c83
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c225
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c133
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c (renamed from drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c)131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c96
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c185
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c254
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c232
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c289
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c130
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c428
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c424
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c135
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h483
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm63
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c83
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c49
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c375
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c70
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c53
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c85
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c55
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h24
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c71
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h3
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig6
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c94
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c95
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c29
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c37
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c130
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_stream.c51
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_helper.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_hw_types.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_link.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc_types.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_abm.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c78
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h23
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c44
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c97
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c40
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c96
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c78
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c430
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h16
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c55
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c152
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c91
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c107
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dm_pp_smu.h2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h5
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/core_types.h25
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h7
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h25
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h5
-rw-r--r--drivers/gpu/drm/amd/display/include/bios_parser_types.h3
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_asic_id.h7
-rw-r--r--drivers/gpu/drm/amd/display/include/dal_types.h2
-rw-r--r--drivers/gpu/drm/amd/display/include/set_mode_types.h5
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.c56
-rw-r--r--drivers/gpu/drm/amd/display/modules/color/color_gamma.h1
-rw-r--r--drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c4
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h18
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h31
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h3
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h3
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h3
-rw-r--r--drivers/gpu/drm/amd/include/cik_structs.h3
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h1
-rw-r--r--drivers/gpu/drm/amd/include/kgd_pp_interface.h11
-rw-r--r--drivers/gpu/drm/amd/include/v9_structs.h3
-rw-r--r--drivers/gpu/drm/amd/include/vi_structs.h3
-rw-r--r--drivers/gpu/drm/amd/powerplay/amdgpu_smu.c76
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c18
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c5
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c156
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c24
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c123
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h3
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c84
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h12
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/power_state.h7
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h12
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h3
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smumgr.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/smu_v11_0.c191
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c21
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/vega20_ppt.c153
-rw-r--r--drivers/gpu/drm/amd/powerplay/vega20_ppt.h44
-rw-r--r--include/uapi/linux/kfd_ioctl.h35
202 files changed, 6414 insertions, 2476 deletions
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index a740e491dfcc..86138798128f 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -70,6 +70,26 @@ Interrupt Handling
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
:internal:
+AMDGPU XGMI Support
+===================
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+ :doc: AMDGPU XGMI Support
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+ :internal:
+
+AMDGPU RAS debugfs control interface
+====================================
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+ :doc: AMDGPU RAS debugfs control interface
+
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+ :internal:
+
+
GPU Power/Thermal Controls and Monitoring
=========================================
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 844f0a162981..a04f2fc7bf37 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -27,10 +27,11 @@ config DRM_AMDGPU_CIK
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
- select MMU_NOTIFIER
+ depends on ARCH_HAS_HMM
+ select HMM_MIRROR
help
- This option selects CONFIG_MMU_NOTIFIER if it isn't already
- selected to enabled full userptr support.
+ This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+ isn't already selected to enabled full userptr support.
config DRM_AMDGPU_GART_DEBUGFS
bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index fdd0ca4b0f0b..57ce44cc3226 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
- amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
+ amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
@@ -173,7 +173,7 @@ endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
include $(FULL_AMD_PATH)/powerplay/Makefile
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 14398f55f602..58f8f132904d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -118,7 +118,6 @@ extern int amdgpu_disp_priority;
extern int amdgpu_hw_i2c;
extern int amdgpu_pcie_gen2;
extern int amdgpu_msi;
-extern int amdgpu_lockup_timeout;
extern int amdgpu_dpm;
extern int amdgpu_fw_load_type;
extern int amdgpu_aspm;
@@ -211,6 +210,7 @@ struct amdgpu_irq_src;
struct amdgpu_fpriv;
struct amdgpu_bo_va_mapping;
struct amdgpu_atif;
+struct kfd_vm_fault_info;
enum amdgpu_cp_irq {
AMDGPU_CP_IRQ_GFX_EOP = 0,
@@ -415,6 +415,7 @@ struct amdgpu_fpriv {
};
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib);
@@ -558,6 +559,8 @@ struct amdgpu_asic_funcs {
uint64_t *count1);
/* do we need to reset the asic at init time (e.g., kexec) */
bool (*need_reset_on_init)(struct amdgpu_device *adev);
+ /* PCIe replay counter */
+ uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
};
/*
@@ -639,6 +642,11 @@ struct nbio_hdp_flush_reg {
u32 ref_and_mask_sdma1;
};
+struct amdgpu_mmio_remap {
+ u32 reg_offset;
+ resource_size_t bus_addr;
+};
+
struct amdgpu_nbio_funcs {
const struct nbio_hdp_flush_reg *hdp_flush_reg;
u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
@@ -666,6 +674,7 @@ struct amdgpu_nbio_funcs {
void (*ih_control)(struct amdgpu_device *adev);
void (*init_registers)(struct amdgpu_device *adev);
void (*detect_hw_virt)(struct amdgpu_device *adev);
+ void (*remap_hdp_registers)(struct amdgpu_device *adev);
};
struct amdgpu_df_funcs {
@@ -680,6 +689,12 @@ struct amdgpu_df_funcs {
u32 *flags);
void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
bool enable);
+ int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
+ int is_enable);
+ int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
+ int is_disable);
+ void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
+ uint64_t *count);
};
/* Define the HW IP blocks will be used in driver , add more if necessary */
enum amd_hw_ip_block_type {
@@ -764,6 +779,7 @@ struct amdgpu_device {
void __iomem *rmmio;
/* protects concurrent MM_INDEX/DATA based register access */
spinlock_t mmio_idx_lock;
+ struct amdgpu_mmio_remap rmmio_remap;
/* protects concurrent SMC based register access */
spinlock_t smc_idx_lock;
amdgpu_rreg_t smc_rreg;
@@ -936,6 +952,13 @@ struct amdgpu_device {
struct work_struct xgmi_reset_work;
bool in_baco_reset;
+
+ long gfx_timeout;
+ long sdma_timeout;
+ long video_timeout;
+ long compute_timeout;
+
+ uint64_t unique_id;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1065,6 +1088,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
+#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
/* Common functions */
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
@@ -1081,6 +1105,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 array_size);
bool amdgpu_device_is_px(struct drm_device *dev);
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
+
/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
void amdgpu_register_atpx_handler(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index aeead072fa79..4af3989e4a75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -25,8 +25,10 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_gfx.h"
+#include "amdgpu_dma_buf.h"
#include <linux/module.h>
#include <linux/dma-buf.h>
+#include "amdgpu_xgmi.h"
static const unsigned int compute_vmid_bitmap = 0xFF00;
@@ -148,7 +150,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
};
/* this is going to have a few of the MSBs set that we need to
- * clear */
+ * clear
+ */
bitmap_complement(gpu_resources.queue_bitmap,
adev->gfx.mec.queue_bitmap,
KGD_MAX_QUEUES);
@@ -162,7 +165,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
gpu_resources.queue_bitmap);
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
- * nbits is not compile time constant */
+ * nbits is not compile time constant
+ */
last_valid_bit = 1 /* only first MEC can have compute queues */
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
@@ -335,6 +339,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
amdgpu_bo_unref(&(bo));
}
+int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
+ void **mem_obj)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ struct amdgpu_bo *bo = NULL;
+ struct amdgpu_bo_param bp;
+ int r;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = 1;
+ bp.domain = AMDGPU_GEM_DOMAIN_GWS;
+ bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ bp.type = ttm_bo_type_device;
+ bp.resv = NULL;
+
+ r = amdgpu_bo_create(adev, &bp, &bo);
+ if (r) {
+ dev_err(adev->dev,
+ "failed to allocate gws BO for amdkfd (%d)\n", r);
+ return r;
+ }
+
+ *mem_obj = bo;
+ return 0;
+}
+
+void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
+{
+ struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
+
+ amdgpu_bo_unref(&bo);
+}
+
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
enum kgd_engine_type type)
{
@@ -518,6 +556,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
return adev->gmc.xgmi.hive_id;
}
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
+{
+ struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
+ struct amdgpu_device *adev = (struct amdgpu_device *)dst;
+ int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
+
+ if (ret < 0) {
+ DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
+ adev->gmc.xgmi.physical_node_id,
+ peer_adev->gmc.xgmi.physical_node_id, ret);
+ ret = 0;
+ }
+ return (uint8_t)ret;
+}
+
+uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ return adev->rmmio_remap.bus_addr;
+}
+
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ return adev->gds.gws_size;
+}
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4e37fa7e85b1..f968bf147c5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -61,7 +61,6 @@ struct kgd_mem {
atomic_t invalid;
struct amdkfd_process_info *process_info;
- struct page **user_pages;
struct amdgpu_sync sync;
@@ -154,6 +153,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
+int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
+void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
enum kgd_engine_type type);
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
@@ -169,6 +172,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
uint32_t *flags);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
+uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
#define read_user_wptr(mmptr, wptr, dst) \
({ \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index fa09e11a600c..c6abcf72e822 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
- pr_debug("kfd: sdma base address: 0x%x\n", retval);
+ pr_debug("sdma base address: 0x%x\n", retval);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index fec3a6aa1de6..4e8b4e949926 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
- pr_debug("kfd: sdma base address: 0x%x\n", retval);
+ pr_debug("sdma base address: 0x%x\n", retval);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index ef3d93b995b2..d5af41143d12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
lock_srbm(kgd, 0, 0, 0, vmid);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */
unlock_srbm(kgd);
@@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
((mec << 5) | (pipe << 3) | queue_id | 0x80));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
}
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
@@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
for (reg = hqd_base;
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
- WREG32(reg, mqd_hqd[reg - hqd_base]);
+ WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
lower_32_bits(guessed_wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
upper_32_bits(guessed_wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
lower_32_bits((uintptr_t)wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
REG_SET_FIELD(m->cp_hqd_eop_rptr,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
release_queue(kgd);
@@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
acquire_queue(kgd, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
- WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+ WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
break;
}
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
@@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
}
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- /* Use legacy mode tlb invalidation.
- *
- * Currently on Raven the code below is broken for anything but
- * legacy mode due to a MMHUB power gating problem. A workaround
- * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
- * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
- * bit.
- *
- * TODO 1: agree on the right set of invalidation registers for
- * KFD use. Use the last one for now. Invalidate both GC and
- * MMHUB.
- *
- * TODO 2: support range-based invalidation, requires kfg2kgd
- * interface change
- */
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
-}
-
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type)
{
signed long r;
uint32_t seq;
@@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
PACKET3_INVALIDATE_TLBS_PASID(pasid) |
- PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
@@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
int vmid;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ uint32_t flush_type = 0;
if (adev->in_gpu_reset)
return -EIO;
+ if (adev->gmc.xgmi.num_physical_nodes &&
+ adev->asic_type == CHIP_VEGA20)
+ flush_type = 2;
if (ring->sched.ready)
- return invalidate_tlbs_with_kiq(adev, pasid);
+ return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
@@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
== pasid) {
- write_vmid_invalidate_request(kgd, vmid);
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid,
+ flush_type);
break;
}
}
@@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
return 0;
}
- write_vmid_invalidate_request(kgd, vmid);
+ /* Use legacy mode tlb invalidation.
+ *
+ * Currently on Raven the code below is broken for anything but
+ * legacy mode due to a MMHUB power gating problem. A workaround
+ * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
+ * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
+ * bit.
+ *
+ * TODO 1: agree on the right set of invalidation registers for
+ * KFD use. Use the last one for now. Invalidate both GC and
+ * MMHUB.
+ *
+ * TODO 2: support range-based invalidation, requires kfg2kgd
+ * interface change
+ */
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
return 0;
}
@@ -838,7 +837,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
mutex_lock(&adev->grbm_idx_mutex);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
@@ -848,7 +847,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a6e5184d436c..87177ed37dd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -30,6 +30,7 @@
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_dma_buf.h"
/* Special VM and GART address alignment needed for VI pre-Fiji due to
* a HW bug.
@@ -456,6 +457,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
mutex_unlock(&process_info->lock);
}
+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
+ struct amdkfd_process_info *process_info)
+{
+ struct ttm_validate_buffer *bo_list_entry;
+
+ bo_list_entry = &mem->validate_list;
+ mutex_lock(&process_info->lock);
+ list_del(&bo_list_entry->head);
+ mutex_unlock(&process_info->lock);
+}
+
/* Initializes user pages. It registers the MMU notifier and validates
* the userptr BO in the GTT domain.
*
@@ -491,28 +503,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
goto out;
}
- /* If no restore worker is running concurrently, user_pages
- * should not be allocated
- */
- WARN(mem->user_pages, "Leaking user_pages array");
-
- mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
- sizeof(struct page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!mem->user_pages) {
- pr_err("%s: Failed to allocate pages array\n", __func__);
- ret = -ENOMEM;
- goto unregister_out;
- }
-
- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
- goto free_out;
+ goto unregister_out;
}
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
-
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("%s: Failed to reserve BO\n", __func__);
@@ -525,11 +521,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
amdgpu_bo_unreserve(bo);
release_out:
- if (ret)
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-free_out:
- kvfree(mem->user_pages);
- mem->user_pages = NULL;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
unregister_out:
if (ret)
amdgpu_mn_unregister(bo);
@@ -588,7 +580,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
- ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
@@ -652,7 +643,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
- ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
i = 0;
@@ -896,6 +886,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
AMDGPU_FENCE_OWNER_KFD, false);
if (ret)
goto wait_pd_fail;
+ ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);
+ if (ret)
+ goto reserve_shared_fail;
amdgpu_bo_fence(vm->root.base.bo,
&vm->process_info->eviction_fence->base, true);
amdgpu_bo_unreserve(vm->root.base.bo);
@@ -909,6 +902,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
return 0;
+reserve_shared_fail:
wait_pd_fail:
validate_pd_fail:
amdgpu_bo_unreserve(vm->root.base.bo);
@@ -1109,7 +1103,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (!offset || !*offset)
return -EINVAL;
user_addr = *offset;
- } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
+ } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |
+ ALLOC_MEM_FLAGS_MMIO_REMAP)) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
bo_type = ttm_bo_type_sg;
@@ -1199,12 +1194,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (user_addr) {
ret = init_user_pages(*mem, current->mm, user_addr);
- if (ret) {
- mutex_lock(&avm->process_info->lock);
- list_del(&(*mem)->validate_list.head);
- mutex_unlock(&avm->process_info->lock);
+ if (ret)
goto allocate_init_user_pages_failed;
- }
}
if (offset)
@@ -1213,6 +1204,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
return 0;
allocate_init_user_pages_failed:
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
amdgpu_bo_unref(&bo);
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
@@ -1262,15 +1254,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
- /* Free user pages if necessary */
- if (mem->user_pages) {
- pr_debug("%s: Freeing user_pages array\n", __func__);
- if (mem->user_pages[0])
- release_pages(mem->user_pages,
- mem->bo->tbo.ttm->num_pages);
- kvfree(mem->user_pages);
- }
-
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
@@ -1294,8 +1277,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Free the sync object */
amdgpu_sync_free(&mem->sync);
- /* If the SG is not NULL, it's one we created for a doorbell
- * BO. We need to free it.
+ /* If the SG is not NULL, it's one we created for a doorbell or mmio
+ * remap BO. We need to free it.
*/
if (mem->bo->tbo.sg) {
sg_free_table(mem->bo->tbo.sg);
@@ -1409,7 +1392,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
is_invalid_userptr);
if (ret) {
- pr_err("Failed to map radeon bo to gpuvm\n");
+ pr_err("Failed to map bo to gpuvm\n");
goto map_bo_to_gpuvm_failed;
}
@@ -1744,25 +1727,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
bo = mem->bo;
- if (!mem->user_pages) {
- mem->user_pages =
- kvmalloc_array(bo->tbo.ttm->num_pages,
- sizeof(struct page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!mem->user_pages) {
- pr_err("%s: Failed to allocate pages array\n",
- __func__);
- return -ENOMEM;
- }
- } else if (mem->user_pages[0]) {
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
- }
-
/* Get updated user pages */
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
- mem->user_pages);
+ bo->tbo.ttm->pages);
if (ret) {
- mem->user_pages[0] = NULL;
+ bo->tbo.ttm->pages[0] = NULL;
pr_info("%s: Failed to get user pages: %d\n",
__func__, ret);
/* Pretend it succeeded. It will fail later
@@ -1771,17 +1740,28 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
* stalled user mode queues.
*/
}
-
- /* Mark the BO as valid unless it was invalidated
- * again concurrently
- */
- if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
- return -EAGAIN;
}
return 0;
}
+/* Remove invalid userptr BOs from hmm track list
+ *
+ * Stop HMM track the userptr update
+ */
+static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info)
+{
+ struct kgd_mem *mem, *tmp_mem;
+ struct amdgpu_bo *bo;
+
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_inval_list,
+ validate_list.head) {
+ bo = mem->bo;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ }
+}
+
/* Validate invalid userptr BOs
*
* Validates BOs on the userptr_inval_list, and moves them back to the
@@ -1806,7 +1786,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
GFP_KERNEL);
if (!pd_bo_list_entries) {
pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_no_mem;
}
INIT_LIST_HEAD(&resv_list);
@@ -1830,7 +1811,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
WARN(!list_empty(&duplicates), "Duplicates should be empty");
if (ret)
- goto out;
+ goto out_free;
amdgpu_sync_create(&sync);
@@ -1846,10 +1827,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
bo = mem->bo;
- /* Copy pages array and validate the BO if we got user pages */
- if (mem->user_pages[0]) {
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
- mem->user_pages);
+ /* Validate the BO if we got user pages */
+ if (bo->tbo.ttm->pages[0]) {
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret) {
@@ -1858,16 +1837,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}
- /* Validate succeeded, now the BO owns the pages, free
- * our copy of the pointer array. Put this BO back on
- * the userptr_valid_list. If we need to revalidate
- * it, we need to start from scratch.
- */
- kvfree(mem->user_pages);
- mem->user_pages = NULL;
list_move_tail(&mem->validate_list.head,
&process_info->userptr_valid_list);
+ /* Stop HMM track the userptr update. We dont check the return
+ * value for concurrent CPU page table update because we will
+ * reschedule the restore worker if process_info->evicted_bos
+ * is updated.
+ */
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+
/* Update mapping. If the BO was not validated
* (because we couldn't get user pages), this will
* clear the page table entries, which will result in
@@ -1897,8 +1876,9 @@ unreserve_out:
ttm_eu_backoff_reservation(&ticket, &resv_list);
amdgpu_sync_wait(&sync, false);
amdgpu_sync_free(&sync);
-out:
+out_free:
kfree(pd_bo_list_entries);
+out_no_mem:
return ret;
}
@@ -1963,7 +1943,9 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
* hanging. No point trying again.
*/
}
+
unlock_out:
+ untrack_invalid_user_pages(process_info);
mutex_unlock(&process_info->lock);
mmput(mm);
put_task_struct(usertask);
@@ -2130,3 +2112,88 @@ ttm_reserve_fail:
kfree(pd_bo_list);
return ret;
}
+
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
+{
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+ struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
+ int ret;
+
+ if (!info || !gws)
+ return -EINVAL;
+
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if (!*mem)
+ return -EINVAL;
+
+ mutex_init(&(*mem)->lock);
+ (*mem)->bo = amdgpu_bo_ref(gws_bo);
+ (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
+ (*mem)->process_info = process_info;
+ add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
+ amdgpu_sync_create(&(*mem)->sync);
+
+
+ /* Validate gws bo the first time it is added to process */
+ mutex_lock(&(*mem)->process_info->lock);
+ ret = amdgpu_bo_reserve(gws_bo, false);
+ if (unlikely(ret)) {
+ pr_err("Reserve gws bo failed %d\n", ret);
+ goto bo_reservation_failure;
+ }
+
+ ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
+ if (ret) {
+ pr_err("GWS BO validate failed %d\n", ret);
+ goto bo_validation_failure;
+ }
+ /* GWS resource is shared b/t amdgpu and amdkfd
+ * Add process eviction fence to bo so they can
+ * evict each other.
+ */
+ amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
+ amdgpu_bo_unreserve(gws_bo);
+ mutex_unlock(&(*mem)->process_info->lock);
+
+ return ret;
+
+bo_validation_failure:
+ amdgpu_bo_unreserve(gws_bo);
+bo_reservation_failure:
+ mutex_unlock(&(*mem)->process_info->lock);
+ amdgpu_sync_free(&(*mem)->sync);
+ remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
+ amdgpu_bo_unref(&gws_bo);
+ mutex_destroy(&(*mem)->lock);
+ kfree(*mem);
+ *mem = NULL;
+ return ret;
+}
+
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
+{
+ int ret;
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+ struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+ struct amdgpu_bo *gws_bo = kgd_mem->bo;
+
+ /* Remove BO from process's validate list so restore worker won't touch
+ * it anymore
+ */
+ remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
+
+ ret = amdgpu_bo_reserve(gws_bo, false);
+ if (unlikely(ret)) {
+ pr_err("Reserve gws bo failed %d\n", ret);
+ //TODO add BO back to validate_list?
+ return ret;
+ }
+ amdgpu_amdkfd_remove_eviction_fence(gws_bo,
+ process_info->eviction_fence);
+ amdgpu_bo_unreserve(gws_bo);
+ amdgpu_sync_free(&kgd_mem->sync);
+ amdgpu_bo_unref(&gws_bo);
+ mutex_destroy(&kgd_mem->lock);
+ kfree(mem);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 5c79da8e1150..d497467b7fc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -81,9 +81,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
return -ENOMEM;
kref_init(&list->refcount);
- list->gds_obj = adev->gds.gds_gfx_bo;
- list->gws_obj = adev->gds.gws_gfx_bo;
- list->oa_obj = adev->gds.oa_gfx_bo;
+ list->gds_obj = NULL;
+ list->gws_obj = NULL;
+ list->oa_obj = NULL;
array = amdgpu_bo_list_array_entry(list, 0);
memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 7c5f5d1601e6..a130e766cbdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
struct amdgpu_bo_va *bo_va;
uint32_t priority;
struct page **user_pages;
- int user_invalidated;
+ bool user_invalidated;
};
struct amdgpu_bo_list {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2f6239b6be6f..d72cc583ebd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
p->uf_entry.tv.bo = &bo->tbo;
/* One for TTM and one for the CS job */
p->uf_entry.tv.num_shared = 2;
- p->uf_entry.user_pages = NULL;
drm_gem_object_put_unlocked(gobj);
@@ -542,14 +541,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
if (usermm && usermm != current->mm)
return -EPERM;
- /* Check if we have user pages and nobody bound the BO already */
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
- lobj->user_pages) {
+ if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
+ lobj->user_invalidated && lobj->user_pages) {
amdgpu_bo_placement_from_domain(bo,
AMDGPU_GEM_DOMAIN_CPU);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return r;
+
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
lobj->user_pages);
binding_userptr = true;
@@ -580,7 +579,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_bo *gds;
struct amdgpu_bo *gws;
struct amdgpu_bo *oa;
- unsigned tries = 10;
int r;
INIT_LIST_HEAD(&p->validated);
@@ -616,79 +614,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
list_add(&p->uf_entry.tv.head, &p->validated);
- while (1) {
- struct list_head need_pages;
-
- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
- &duplicates);
- if (unlikely(r != 0)) {
- if (r != -ERESTARTSYS)
- DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
- goto error_free_pages;
- }
-
- INIT_LIST_HEAD(&need_pages);
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
- if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
- &e->user_invalidated) && e->user_pages) {
-
- /* We acquired a page array, but somebody
- * invalidated it. Free it and try again
- */
- release_pages(e->user_pages,
- bo->tbo.ttm->num_pages);
- kvfree(e->user_pages);
- e->user_pages = NULL;
- }
-
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
- !e->user_pages) {
- list_del(&e->tv.head);
- list_add(&e->tv.head, &need_pages);
-
- amdgpu_bo_unreserve(bo);
- }
+ /* Get userptr backing pages. If pages are updated after registered
+ * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
+ * amdgpu_ttm_backend_bind() to flush and invalidate new pages
+ */
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ bool userpage_invalidated = false;
+ int i;
+
+ e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!e->user_pages) {
+ DRM_ERROR("calloc failure\n");
+ return -ENOMEM;
}
- if (list_empty(&need_pages))
- break;
-
- /* Unreserve everything again. */
- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
- /* We tried too many times, just abort */
- if (!--tries) {
- r = -EDEADLK;
- DRM_ERROR("deadlock in %s\n", __func__);
- goto error_free_pages;
+ r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);
+ if (r) {
+ kvfree(e->user_pages);
+ e->user_pages = NULL;
+ return r;
}
- /* Fill the page arrays for all userptrs. */
- list_for_each_entry(e, &need_pages, tv.head) {
- struct ttm_tt *ttm = e->tv.bo->ttm;
-
- e->user_pages = kvmalloc_array(ttm->num_pages,
- sizeof(struct page*),
- GFP_KERNEL | __GFP_ZERO);
- if (!e->user_pages) {
- r = -ENOMEM;
- DRM_ERROR("calloc failure in %s\n", __func__);
- goto error_free_pages;
- }
-
- r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
- if (r) {
- DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
- kvfree(e->user_pages);
- e->user_pages = NULL;
- goto error_free_pages;
+ for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
+ if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+ userpage_invalidated = true;
+ break;
}
}
+ e->user_invalidated = userpage_invalidated;
+ }
- /* And try again. */
- list_splice(&need_pages, &p->validated);
+ r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+ &duplicates);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+ goto out;
}
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -757,17 +721,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
error_validate:
if (r)
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-error_free_pages:
-
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- if (!e->user_pages)
- continue;
-
- release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
- kvfree(e->user_pages);
- }
-
+out:
return r;
}
@@ -1054,11 +1008,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
j++;
}
- /* UVD & VCE fw doesn't support user fences */
+ /* MM engine doesn't support user fences */
ring = to_amdgpu_ring(parser->entity->rq->sched);
- if (parser->job->uf_addr && (
- ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
- ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+ if (parser->job->uf_addr && ring->funcs->no_user_fence)
return -EINVAL;
return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
@@ -1328,7 +1280,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
struct amdgpu_bo_list_entry *e;
struct amdgpu_job *job;
uint64_t seq;
-
int r;
job = p->job;
@@ -1338,15 +1289,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
- /* No memory allocation is allowed while holding the mn lock */
+ /* No memory allocation is allowed while holding the mn lock.
+ * p->mn is hold until amdgpu_cs_submit is finished and fence is added
+ * to BOs.
+ */
amdgpu_mn_lock(p->mn);
+
+ /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
+ * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
+ */
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
- r = -ERESTARTSYS;
- goto error_abort;
- }
+ r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ }
+ if (r) {
+ r = -EAGAIN;
+ goto error_abort;
}
job->owner = p->filp;
@@ -1442,6 +1401,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9f282e971197..0ffa6733f2b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -97,6 +97,28 @@ static const char *amdgpu_asic_name[] = {
"LAST",
};
+/**
+ * DOC: pcie_replay_count
+ *
+ * The amdgpu driver provides a sysfs API for reporting the total number
+ * of PCIe replays (NAKs)
+ * The file pcie_replay_count is used for this and returns the total
+ * number of replays as a sum of the NAKs generated and NAKs received
+ */
+
+static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
+}
+
+static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
+ amdgpu_device_get_pcie_replay_count, NULL);
+
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
/**
@@ -910,8 +932,10 @@ def_value:
* Validates certain module parameters and updates
* the associated values used by the driver (all asics).
*/
-static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
+static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
{
+ int ret = 0;
+
if (amdgpu_sched_jobs < 4) {
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
amdgpu_sched_jobs);
@@ -956,12 +980,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_vram_page_split = 1024;
}
- if (amdgpu_lockup_timeout == 0) {
- dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
- amdgpu_lockup_timeout = 10000;
+ ret = amdgpu_device_get_job_timeout_settings(adev);
+ if (ret) {
+ dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+ return ret;
}
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
+
+ return ret;
}
/**
@@ -1505,12 +1532,26 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return -EAGAIN;
+
+ /* query the reg access mode at the very beginning */
+ amdgpu_virt_init_reg_access_mode(adev);
}
adev->pm.pp_feature = amdgpu_pp_feature_mask;
if (amdgpu_sriov_vf(adev))
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+ /* Read BIOS */
+ if (!amdgpu_get_bios(adev))
+ return -EINVAL;
+
+ r = amdgpu_atombios_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+ return r;
+ }
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
DRM_ERROR("disabled ip block: %d <%s>\n",
@@ -1550,6 +1591,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
if (adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
if (r) {
@@ -2473,7 +2515,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->lock_reset);
mutex_init(&adev->virt.dpm_mutex);
- amdgpu_device_check_arguments(adev);
+ r = amdgpu_device_check_arguments(adev);
+ if (r)
+ return r;
spin_lock_init(&adev->mmio_idx_lock);
spin_lock_init(&adev->smc_idx_lock);
@@ -2558,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto fence_driver_init;
}
- /* Read BIOS */
- if (!amdgpu_get_bios(adev)) {
- r = -EINVAL;
- goto failed;
- }
-
- r = amdgpu_atombios_init(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atombios_init failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
- goto failed;
- }
-
/* detect if we are with an SRIOV vbios */
amdgpu_device_detect_sriov_bios(adev);
@@ -2672,6 +2703,10 @@ fence_driver_init:
if (r)
DRM_ERROR("registering pm debugfs failed (%d).\n", r);
+ r = amdgpu_ucode_sysfs_init(adev);
+ if (r)
+ DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+
r = amdgpu_debugfs_gem_init(adev);
if (r)
DRM_ERROR("registering gem debugfs failed (%d).\n", r);
@@ -2712,7 +2747,13 @@ fence_driver_init:
}
/* must succeed. */
- amdgpu_ras_post_init(adev);
+ amdgpu_ras_resume(adev);
+
+ r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
+ if (r) {
+ dev_err(adev->dev, "Could not create pcie_replay_count");
+ return r;
+ }
return 0;
@@ -2777,6 +2818,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->rmmio = NULL;
amdgpu_device_doorbell_fini(adev);
amdgpu_debugfs_regs_cleanup(adev);
+ device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
+ amdgpu_ucode_sysfs_fini(adev);
}
@@ -2857,6 +2900,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
amdgpu_amdkfd_suspend(adev);
+ amdgpu_ras_suspend(adev);
+
r = amdgpu_device_ip_suspend_phase1(adev);
/* evict vram memory */
@@ -2977,6 +3022,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
drm_kms_helper_poll_enable(dev);
+ amdgpu_ras_resume(adev);
+
/*
* Most of the connector probing functions try to acquire runtime pm
* refs to ensure that the GPU is powered on when connector polling is
@@ -3455,6 +3502,13 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
+ r = amdgpu_device_ip_late_init(tmp_adev);
+ if (r)
+ goto out;
+
+ /* must succeed. */
+ amdgpu_ras_resume(tmp_adev);
+
/* Update PSP FW topology after reset */
if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
r = amdgpu_xgmi_update_topology(hive, tmp_adev);
@@ -3695,43 +3749,6 @@ skip_hw_reset:
return r;
}
-static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
- enum pci_bus_speed *speed,
- enum pcie_link_width *width)
-{
- struct pci_dev *pdev = adev->pdev;
- enum pci_bus_speed cur_speed;
- enum pcie_link_width cur_width;
- u32 ret = 1;
-
- *speed = PCI_SPEED_UNKNOWN;
- *width = PCIE_LNK_WIDTH_UNKNOWN;
-
- while (pdev) {
- cur_speed = pcie_get_speed_cap(pdev);
- cur_width = pcie_get_width_cap(pdev);
- ret = pcie_bandwidth_available(adev->pdev, NULL,
- NULL, &cur_width);
- if (!ret)
- cur_width = PCIE_LNK_WIDTH_RESRV;
-
- if (cur_speed != PCI_SPEED_UNKNOWN) {
- if (*speed == PCI_SPEED_UNKNOWN)
- *speed = cur_speed;
- else if (cur_speed < *speed)
- *speed = cur_speed;
- }
-
- if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
- if (*width == PCIE_LNK_WIDTH_UNKNOWN)
- *width = cur_width;
- else if (cur_width < *width)
- *width = cur_width;
- }
- pdev = pci_upstream_bridge(pdev);
- }
-}
-
/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
@@ -3765,8 +3782,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
return;
- amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap,
- &platform_link_width);
+ pcie_bandwidth_available(adev->pdev, NULL,
+ &platform_speed_cap, &platform_link_width);
if (adev->pm.pcie_gen_mask == 0) {
/* asic caps */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index b083b219b1a9..30e6ad8a90bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -631,10 +631,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
amdgpu_dither_enum_list, sz);
if (amdgpu_device_has_dc_support(adev)) {
- adev->mode_info.max_bpc_property =
- drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
- if (!adev->mode_info.max_bpc_property)
- return -ENOMEM;
adev->mode_info.abm_level_property =
drm_property_create_range(adev->ddev, 0,
"abm level", 0, 4);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index a38e0fb4a6fe..4711cf1b5bd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2012 Advanced Micro Devices, Inc.
+ * Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -103,7 +103,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
* Returns:
* 0 on success or a negative error code on failure.
*/
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
+ struct vm_area_struct *vma)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -137,57 +138,6 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
return ret;
}
-/**
- * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
- * implementation
- * @dev: DRM device
- * @attach: DMA-buf attachment
- * @sg: Scatter/gather table
- *
- * Imports shared DMA buffer memory exported by another device.
- *
- * Returns:
- * A new GEM BO of the given DRM device, representing the memory
- * described by the given DMA-buf attachment and scatter/gather table.
- */
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
- struct dma_buf_attachment *attach,
- struct sg_table *sg)
-{
- struct reservation_object *resv = attach->dmabuf->resv;
- struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_bo *bo;
- struct amdgpu_bo_param bp;
- int ret;
-
- memset(&bp, 0, sizeof(bp));
- bp.size = attach->dmabuf->size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_CPU;
- bp.flags = 0;
- bp.type = ttm_bo_type_sg;
- bp.resv = resv;
- ww_mutex_lock(&resv->lock, NULL);
- ret = amdgpu_bo_create(adev, &bp, &bo);
- if (ret)
- goto error;
-
- bo->tbo.sg = sg;
- bo->tbo.ttm->sg = sg;
- bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
- bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
- if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
- bo->prime_shared_count = 1;
-
- ww_mutex_unlock(&resv->lock);
- return &bo->gem_base;
-
-error:
- ww_mutex_unlock(&resv->lock);
- return ERR_PTR(ret);
-}
-
static int
__reservation_object_make_exclusive(struct reservation_object *obj)
{
@@ -231,7 +181,7 @@ err_fences_put:
}
/**
- * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
+ * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
* @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment
*
@@ -242,8 +192,8 @@ err_fences_put:
* Returns:
* 0 on success or a negative error code on failure.
*/
-static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
- struct dma_buf_attachment *attach)
+static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
+ struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -291,15 +241,15 @@ error_detach:
}
/**
- * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
+ * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
* @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment
*
* This is called when a shared DMA buffer no longer needs to be accessible by
* another device. For now, simply unpins the buffer from GTT.
*/
-static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
- struct dma_buf_attachment *attach)
+static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
+ struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -334,7 +284,7 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
}
/**
- * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
+ * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
* @dma_buf: Shared DMA buffer
* @direction: Direction of DMA transfer
*
@@ -345,8 +295,8 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
* Returns:
* 0 on success or a negative error code on failure.
*/
-static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
- enum dma_data_direction direction)
+static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
+ enum dma_data_direction direction)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -374,12 +324,12 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
}
const struct dma_buf_ops amdgpu_dmabuf_ops = {
- .attach = amdgpu_gem_map_attach,
- .detach = amdgpu_gem_map_detach,
+ .attach = amdgpu_dma_buf_map_attach,
+ .detach = amdgpu_dma_buf_map_detach,
.map_dma_buf = drm_gem_map_dma_buf,
.unmap_dma_buf = drm_gem_unmap_dma_buf,
.release = drm_gem_dmabuf_release,
- .begin_cpu_access = amdgpu_gem_begin_cpu_access,
+ .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
.vunmap = drm_gem_dmabuf_vunmap,
@@ -418,6 +368,57 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
}
/**
+ * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
+ * implementation
+ * @dev: DRM device
+ * @attach: DMA-buf attachment
+ * @sg: Scatter/gather table
+ *
+ * Imports shared DMA buffer memory exported by another device.
+ *
+ * Returns:
+ * A new GEM BO of the given DRM device, representing the memory
+ * described by the given DMA-buf attachment and scatter/gather table.
+ */
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sg)
+{
+ struct reservation_object *resv = attach->dmabuf->resv;
+ struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_bo *bo;
+ struct amdgpu_bo_param bp;
+ int ret;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = attach->dmabuf->size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+ bp.flags = 0;
+ bp.type = ttm_bo_type_sg;
+ bp.resv = resv;
+ ww_mutex_lock(&resv->lock, NULL);
+ ret = amdgpu_bo_create(adev, &bp, &bo);
+ if (ret)
+ goto error;
+
+ bo->tbo.sg = sg;
+ bo->tbo.ttm->sg = sg;
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+ if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+ bo->prime_shared_count = 1;
+
+ ww_mutex_unlock(&resv->lock);
+ return &bo->gem_base;
+
+error:
+ ww_mutex_unlock(&resv->lock);
+ return ERR_PTR(ret);
+}
+
+/**
* amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
* @dev: DRM device
* @dma_buf: Shared DMA buffer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
new file mode 100644
index 000000000000..c7056cbe8685
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_DMA_BUF_H__
+#define __AMDGPU_DMA_BUF_H__
+
+#include <drm/drm_gem.h>
+
+struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sg);
+struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
+ struct drm_gem_object *gobj,
+ int flags);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf);
+struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
+void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
+void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
+ struct vm_area_struct *vma);
+
+extern const struct dma_buf_ops amdgpu_dmabuf_ops;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index dca35407879d..521dbd0d9af8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal {
int min_temp;
/* high temperature threshold */
int max_temp;
+ /* edge max emergency(shutdown) temp */
+ int max_edge_emergency_temp;
+ /* hotspot low temperature threshold */
+ int min_hotspot_temp;
+ /* hotspot high temperature critical threshold */
+ int max_hotspot_crit_temp;
+ /* hotspot max emergency(shutdown) temp */
+ int max_hotspot_emergency_temp;
+ /* memory low temperature threshold */
+ int min_mem_temp;
+ /* memory high temperature critical threshold */
+ int max_mem_crit_temp;
+ /* memory max emergency(shutdown) temp */
+ int max_mem_emergency_temp;
/* was last interrupt low to high or high to low */
bool high_to_low;
/* interrupt source */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 1e2cc9d68a05..1f38d6fc1fe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -36,7 +36,7 @@
#include "amdgpu.h"
#include "amdgpu_irq.h"
-#include "amdgpu_gem.h"
+#include "amdgpu_dma_buf.h"
#include "amdgpu_amdkfd.h"
@@ -81,6 +81,8 @@
#define KMS_DRIVER_MINOR 32
#define KMS_DRIVER_PATCHLEVEL 0
+#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256
+
int amdgpu_vram_limit = 0;
int amdgpu_vis_vram_limit = 0;
int amdgpu_gart_size = -1; /* auto */
@@ -93,7 +95,7 @@ int amdgpu_disp_priority = 0;
int amdgpu_hw_i2c = 0;
int amdgpu_pcie_gen2 = -1;
int amdgpu_msi = -1;
-int amdgpu_lockup_timeout = 10000;
+char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
int amdgpu_dpm = -1;
int amdgpu_fw_load_type = -1;
int amdgpu_aspm = -1;
@@ -227,12 +229,21 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(msi, amdgpu_msi, int, 0444);
/**
- * DOC: lockup_timeout (int)
- * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000.
- * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000.
- */
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
-module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
+ * DOC: lockup_timeout (string)
+ * Set GPU scheduler timeout value in ms.
+ *
+ * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
+ * multiple values specified. 0 and negative values are invalidated. They will be adjusted
+ * to default timeout.
+ * - With one value specified, the setting will apply to all non-compute jobs.
+ * - With multiple values specified, the first one will be for GFX. The second one is for Compute.
+ * And the third and fourth ones are for SDMA and Video.
+ * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
+ * jobs is 10000. And there is no timeout enforced on compute jobs.
+ */
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and no timeout for compute jobs), "
+ "format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
+module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
/**
* DOC: dpm (int)
@@ -655,6 +666,16 @@ MODULE_PARM_DESC(noretry,
int halt_if_hws_hang;
module_param(halt_if_hws_hang, int, 0644);
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
+
+/**
+ * DOC: hws_gws_support(bool)
+ * Whether HWS support gws barriers. Default value: false (not supported)
+ * This will be replaced with a MEC firmware version check once firmware
+ * is ready
+ */
+bool hws_gws_support;
+module_param(hws_gws_support, bool, 0444);
+MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
#endif
/**
@@ -1216,6 +1237,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
return 0;
}
+int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
+{
+ char *input = amdgpu_lockup_timeout;
+ char *timeout_setting = NULL;
+ int index = 0;
+ long timeout;
+ int ret = 0;
+
+ /*
+ * By default timeout for non compute jobs is 10000.
+ * And there is no timeout enforced on compute jobs.
+ */
+ adev->gfx_timeout = adev->sdma_timeout = adev->video_timeout = 10000;
+ adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
+
+ if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
+ while ((timeout_setting = strsep(&input, ",")) &&
+ strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
+ ret = kstrtol(timeout_setting, 0, &timeout);
+ if (ret)
+ return ret;
+
+ /* Invalidate 0 and negative values */
+ if (timeout <= 0) {
+ index++;
+ continue;
+ }
+
+ switch (index++) {
+ case 0:
+ adev->gfx_timeout = timeout;
+ break;
+ case 1:
+ adev->compute_timeout = timeout;
+ break;
+ case 2:
+ adev->sdma_timeout = timeout;
+ break;
+ case 3:
+ adev->video_timeout = timeout;
+ break;
+ default:
+ break;
+ }
+ }
+ /*
+ * There is only one value specified and
+ * it should apply to all non-compute jobs.
+ */
+ if (index == 1)
+ adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+ }
+
+ return ret;
+}
+
static bool
amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
bool in_vblank_irq, int *vpos, int *hpos,
@@ -1230,7 +1307,8 @@ static struct drm_driver kms_driver = {
.driver_features =
DRIVER_USE_AGP | DRIVER_ATOMIC |
DRIVER_GEM |
- DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
+ DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE,
.load = amdgpu_driver_load_kms,
.open = amdgpu_driver_open_kms,
.postclose = amdgpu_driver_postclose_kms,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 4dee2326b29c..3a483f7e89c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -427,9 +427,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
unsigned num_hw_submission)
{
+ struct amdgpu_device *adev = ring->adev;
long timeout;
int r;
+ if (!adev)
+ return -EINVAL;
+
/* Check that num_hw_submission is a power of two */
if ((num_hw_submission & (num_hw_submission - 1)) != 0)
return -EINVAL;
@@ -451,12 +455,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
/* No need to setup the GPU scheduler for KIQ ring */
if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
- /* for non-sriov case, no timeout enforce on compute ring */
- if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
- && !amdgpu_sriov_vf(ring->adev))
- timeout = MAX_SCHEDULE_TIMEOUT;
- else
- timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ timeout = adev->gfx_timeout;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ /*
+ * For non-sriov case, no timeout enforce
+ * on compute ring by default. Unless user
+ * specifies a timeout for compute ring.
+ *
+ * For sriov case, always use the timeout
+ * as gfx ring
+ */
+ if (!amdgpu_sriov_vf(ring->adev))
+ timeout = adev->compute_timeout;
+ else
+ timeout = adev->gfx_timeout;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ timeout = adev->sdma_timeout;
+ break;
+ default:
+ timeout = adev->video_timeout;
+ break;
+ }
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
num_hw_submission, amdgpu_job_hang_limit,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index f89f5734d985..dad2186f4ed5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -27,26 +27,11 @@
struct amdgpu_ring;
struct amdgpu_bo;
-struct amdgpu_gds_asic_info {
- uint32_t total_size;
- uint32_t gfx_partition_size;
- uint32_t cs_partition_size;
-};
-
struct amdgpu_gds {
- struct amdgpu_gds_asic_info mem;
- struct amdgpu_gds_asic_info gws;
- struct amdgpu_gds_asic_info oa;
+ uint32_t gds_size;
+ uint32_t gws_size;
+ uint32_t oa_size;
uint32_t gds_compute_max_wave_id;
-
- /* At present, GDS, GWS and OA resources for gfx (graphics)
- * is always pre-allocated and available for graphics operation.
- * Such resource is shared between all gfx clients.
- * TODO: move this operation to user space
- * */
- struct amdgpu_bo* gds_gfx_bo;
- struct amdgpu_bo* gws_gfx_bo;
- struct amdgpu_bo* oa_gfx_bo;
};
struct amdgpu_gds_reg_offset {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d4fcf5475464..7b840367004c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -330,26 +330,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
r = amdgpu_bo_reserve(bo, true);
if (r)
- goto free_pages;
+ goto user_pages_done;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);
if (r)
- goto free_pages;
+ goto user_pages_done;
}
r = drm_gem_handle_create(filp, gobj, &handle);
- /* drop reference from allocate - handle holds it now */
- drm_gem_object_put_unlocked(gobj);
if (r)
- return r;
+ goto user_pages_done;
args->handle = handle;
- return 0;
-free_pages:
- release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
+user_pages_done:
+ if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
release_object:
drm_gem_object_put_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index f1ddfc50bcc7..b8ba6e27c61f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv);
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
- struct dma_buf_attachment *attach,
- struct sg_table *sg);
-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
- struct drm_gem_object *gobj,
- int flags);
-struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
- struct dma_buf *dma_buf);
-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
-void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
-void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
-
-extern const struct dma_buf_ops amdgpu_dmabuf_ops;
/*
* GEM objects.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 0a17fb1af204..7ab1241bd9e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
if (amdgpu_device_should_recover_gpu(ring->adev))
amdgpu_device_gpu_recover(ring->adev, job);
+ else
+ drm_sched_suspend_timeout(&ring->sched);
}
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b17d0545728e..edb675103bd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -590,13 +590,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_amdgpu_info_gds gds_info;
memset(&gds_info, 0, sizeof(gds_info));
- gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
- gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
- gds_info.gds_total_size = adev->gds.mem.total_size;
- gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
- gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
- gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
- gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
+ gds_info.compute_partition_size = adev->gds.gds_size;
+ gds_info.gds_total_size = adev->gds.gds_size;
+ gds_info.gws_per_compute_partition = adev->gds.gws_size;
+ gds_info.oa_per_compute_partition = adev->gds.oa_size;
return copy_to_user(out, &gds_info,
min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 58ed401c5996..41ccee49a224 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,7 +45,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
-#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
#include <linux/interval_tree.h>
#include <drm/drmP.h>
#include <drm/drm.h>
@@ -58,14 +58,12 @@
*
* @adev: amdgpu device pointer
* @mm: process address space
- * @mn: MMU notifier structure
* @type: type of MMU notifier
* @work: destruction work item
* @node: hash table node to find structure by adev and mn
* @lock: rw semaphore protecting the notifier nodes
* @objects: interval tree containing amdgpu_mn_nodes
- * @read_lock: mutex for recursive locking of @lock
- * @recursion: depth of recursion
+ * @mirror: HMM mirror function support
*
* Data for each amdgpu device and process address space.
*/
@@ -73,7 +71,6 @@ struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
- struct mmu_notifier mn;
enum amdgpu_mn_type type;
/* only used on destruction */
@@ -85,8 +82,9 @@ struct amdgpu_mn {
/* objects protected by lock */
struct rw_semaphore lock;
struct rb_root_cached objects;
- struct mutex read_lock;
- atomic_t recursion;
+
+ /* HMM mirror */
+ struct hmm_mirror mirror;
};
/**
@@ -103,7 +101,7 @@ struct amdgpu_mn_node {
};
/**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
*
* @work: previously sheduled work item
*
@@ -129,28 +127,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
}
up_write(&amn->lock);
mutex_unlock(&adev->mn_lock);
- mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+
+ hmm_mirror_unregister(&amn->mirror);
kfree(amn);
}
/**
- * amdgpu_mn_release - callback to notify about mm destruction
+ * amdgpu_hmm_mirror_release - callback to notify about mm destruction
*
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
*
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
*/
-static void amdgpu_mn_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
INIT_WORK(&amn->work, amdgpu_mn_destroy);
schedule_work(&amn->work);
}
-
/**
* amdgpu_mn_lock - take the write side lock for this notifier
*
@@ -181,14 +177,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
{
if (blockable)
- mutex_lock(&amn->read_lock);
- else if (!mutex_trylock(&amn->read_lock))
+ down_read(&amn->lock);
+ else if (!down_read_trylock(&amn->lock))
return -EAGAIN;
- if (atomic_inc_return(&amn->recursion) == 1)
- down_read_non_owner(&amn->lock);
- mutex_unlock(&amn->read_lock);
-
return 0;
}
@@ -199,8 +191,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
*/
static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
{
- if (atomic_dec_return(&amn->recursion) == 0)
- up_read_non_owner(&amn->lock);
+ up_read(&amn->lock);
}
/**
@@ -229,149 +220,132 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
true, false, MAX_SCHEDULE_TIMEOUT);
if (r <= 0)
DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
- amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
}
}
/**
- * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
+ * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
*
- * @mn: our notifier
- * @range: mmu notifier context
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
+static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
- unsigned long end;
/* notification is exclusive, but interval is inclusive */
- end = range->end - 1;
+ end -= 1;
/* TODO we should be able to split locking for interval tree and
* amdgpu_mn_invalidate_node
*/
- if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
+ if (amdgpu_mn_read_lock(amn, blockable))
return -EAGAIN;
- it = interval_tree_iter_first(&amn->objects, range->start, end);
+ it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
- if (!mmu_notifier_range_blockable(range)) {
+ if (!blockable) {
amdgpu_mn_read_unlock(amn);
return -EAGAIN;
}
node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, range->start, end);
+ it = interval_tree_iter_next(it, start, end);
- amdgpu_mn_invalidate_node(node, range->start, end);
+ amdgpu_mn_invalidate_node(node, start, end);
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
/**
- * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
+ * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
*
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* We temporarily evict all BOs between start and end. This
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
+static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
- unsigned long end;
/* notification is exclusive, but interval is inclusive */
- end = range->end - 1;
+ end -= 1;
- if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
+ if (amdgpu_mn_read_lock(amn, blockable))
return -EAGAIN;
- it = interval_tree_iter_first(&amn->objects, range->start, end);
+ it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
struct amdgpu_bo *bo;
- if (!mmu_notifier_range_blockable(range)) {
+ if (!blockable) {
amdgpu_mn_read_unlock(amn);
return -EAGAIN;
}
node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, range->start, end);
+ it = interval_tree_iter_next(it, start, end);
list_for_each_entry(bo, &node->bos, mn_list) {
struct kgd_mem *mem = bo->kfd_bo;
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
- range->start,
- end))
- amdgpu_amdkfd_evict_userptr(mem, range->mm);
+ start, end))
+ amdgpu_amdkfd_evict_userptr(mem, amn->mm);
}
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
*/
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
- amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
[AMDGPU_MN_TYPE_GFX] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
+ .release = amdgpu_hmm_mirror_release
},
[AMDGPU_MN_TYPE_HSA] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
+ .release = amdgpu_hmm_mirror_release
},
};
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
/**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
*
* @adev: amdgpu device pointer
* @type: type of MMU notifier context
*
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
*/
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
@@ -401,12 +375,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
amn->mm = mm;
init_rwsem(&amn->lock);
amn->type = type;
- amn->mn.ops = &amdgpu_mn_ops[type];
amn->objects = RB_ROOT_CACHED;
- mutex_init(&amn->read_lock);
- atomic_set(&amn->recursion, 0);
- r = __mmu_notifier_register(&amn->mn, mm);
+ amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+ r = hmm_mirror_register(&amn->mirror, mm);
if (r)
goto free_amn;
@@ -432,7 +404,7 @@ free_amn:
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -488,11 +460,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
}
/**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
*
* @bo: amdgpu buffer object
*
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
@@ -528,3 +500,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
mutex_unlock(&adev->mn_lock);
}
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+ (1 << 0), /* HMM_PFN_VALID */
+ (1 << 1), /* HMM_PFN_WRITE */
+ 0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+ 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+ 0, /* HMM_PFN_NONE */
+ 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
+void amdgpu_hmm_init_range(struct hmm_range *range)
+{
+ if (range) {
+ range->flags = hmm_range_flags;
+ range->values = hmm_range_values;
+ range->pfn_shift = PAGE_SHIFT;
+ range->pfns = NULL;
+ INIT_LIST_HEAD(&range->list);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..f5b67c63ed6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -25,22 +25,24 @@
#define __AMDGPU_MN_H__
/*
- * MMU Notifier
+ * HMM mirror
*/
struct amdgpu_mn;
+struct hmm_range;
enum amdgpu_mn_type {
AMDGPU_MN_TYPE_GFX,
AMDGPU_MN_TYPE_HSA,
};
-#if defined(CONFIG_MMU_NOTIFIER)
+#if defined(CONFIG_HMM_MIRROR)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
+void amdgpu_hmm_init_range(struct hmm_range *range);
#else
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
@@ -51,6 +53,8 @@ static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
}
static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
+ DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
+ "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
return -ENODEV;
}
static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 2e9e3db778c6..eb9975f4decb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -331,8 +331,6 @@ struct amdgpu_mode_info {
struct drm_property *audio_property;
/* FMT dithering */
struct drm_property *dither_property;
- /* maximum number of bits per channel for monitor color */
- struct drm_property *max_bpc_property;
/* Adaptive Backlight Modulation (power feature) */
struct drm_property *abm_level_property;
/* hardcoded DFP edid from BIOS */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 34471dbaa872..a73e1903d29b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -67,6 +67,15 @@ static const struct cg_flag_name clocks[] = {
{0, NULL},
};
+static const struct hwmon_temp_label {
+ enum PP_HWMON_TEMP channel;
+ const char *label;
+} temp_label[] = {
+ {PP_TEMP_EDGE, "edge"},
+ {PP_TEMP_JUNCTION, "junction"},
+ {PP_TEMP_MEM, "mem"},
+};
+
void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
{
if (adev->pm.dpm_enabled) {
@@ -758,7 +767,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
pr_debug("featuremask = 0x%llx\n", featuremask);
- if (adev->powerplay.pp_funcs->set_ppfeature_status) {
+ if (is_support_sw_smu(adev)) {
+ ret = smu_set_ppfeature_status(&adev->smu, featuremask);
+ if (ret)
+ return -EINVAL;
+ } else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
if (ret)
return -EINVAL;
@@ -774,7 +787,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
- if (adev->powerplay.pp_funcs->get_ppfeature_status)
+ if (is_support_sw_smu(adev)) {
+ return smu_get_ppfeature_status(&adev->smu, buf);
+ } else if (adev->powerplay.pp_funcs->get_ppfeature_status)
return amdgpu_dpm_get_ppfeature_status(adev, buf);
return snprintf(buf, PAGE_SIZE, "\n");
@@ -1303,6 +1318,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
}
/**
+ * DOC: mem_busy_percent
+ *
+ * The amdgpu driver provides a sysfs API for reading how busy the VRAM
+ * is as a percentage. The file mem_busy_percent is used for this.
+ * The SMU firmware computes a percentage of load based on the
+ * aggregate activity level in the IP cores.
+ */
+static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ int r, value, size = sizeof(value);
+
+ /* read the IP busy sensor */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
+ (void *)&value, &size);
+
+ if (r)
+ return r;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+/**
* DOC: pcie_bw
*
* The amdgpu driver provides a sysfs API for estimating how much data
@@ -1327,6 +1368,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
count0, count1, pcie_get_mps(adev->pdev));
}
+/**
+ * DOC: unique_id
+ *
+ * The amdgpu driver provides a sysfs API for providing a unique ID for the GPU
+ * The file unique_id is used for this.
+ * This will provide a Unique ID that will persist from machine to machine
+ *
+ * NOTE: This will only work for GFX9 and newer. This file will be absent
+ * on unsupported ASICs (GFX8 and older)
+ */
+static ssize_t amdgpu_get_unique_id(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+
+ if (adev->unique_id)
+ return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
+
+ return 0;
+}
+
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
@@ -1371,10 +1435,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
amdgpu_set_pp_od_clk_voltage);
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
amdgpu_get_busy_percent, NULL);
+static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
+ amdgpu_get_memory_busy_percent, NULL);
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
amdgpu_get_ppfeature_status,
amdgpu_set_ppfeature_status);
+static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
@@ -1382,6 +1449,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
struct drm_device *ddev = adev->ddev;
+ int channel = to_sensor_dev_attr(attr)->index;
int r, temp, size = sizeof(temp);
/* Can't get temperature when the card is off */
@@ -1389,11 +1457,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
- /* get the temperature */
- r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
- (void *)&temp, &size);
- if (r)
- return r;
+ if (channel >= PP_TEMP_MAX)
+ return -EINVAL;
+
+ switch (channel) {
+ case PP_TEMP_JUNCTION:
+ /* get current junction temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ case PP_TEMP_EDGE:
+ /* get current edge temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ case PP_TEMP_MEM:
+ /* get current memory temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ }
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
@@ -1414,6 +1503,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
+static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int hyst = to_sensor_dev_attr(attr)->index;
+ int temp;
+
+ if (hyst)
+ temp = adev->pm.dpm.thermal.min_hotspot_temp;
+ else
+ temp = adev->pm.dpm.thermal.max_hotspot_crit_temp;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
+static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int hyst = to_sensor_dev_attr(attr)->index;
+ int temp;
+
+ if (hyst)
+ temp = adev->pm.dpm.thermal.min_mem_temp;
+ else
+ temp = adev->pm.dpm.thermal.max_mem_crit_temp;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
+static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int channel = to_sensor_dev_attr(attr)->index;
+
+ if (channel >= PP_TEMP_MAX)
+ return -EINVAL;
+
+ return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
+}
+
+static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int channel = to_sensor_dev_attr(attr)->index;
+ int temp = 0;
+
+ if (channel >= PP_TEMP_MAX)
+ return -EINVAL;
+
+ switch (channel) {
+ case PP_TEMP_JUNCTION:
+ temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
+ break;
+ case PP_TEMP_EDGE:
+ temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
+ break;
+ case PP_TEMP_MEM:
+ temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
+ break;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1983,11 +2142,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
* hwmon interfaces for GPU temperature:
*
- * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
+ * - temp2_input and temp3_input are supported on SOC15 dGPUs only
+ *
+ * - temp[1-3]_label: temperature channel label
+ * - temp2_label and temp3_label are supported on SOC15 dGPUs only
+ *
+ * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
+ * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
*
- * - temp1_crit: temperature critical max value in millidegrees Celsius
+ * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
*
- * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
+ * - these are supported on SOC15 dGPUs only
*
* hwmon interfaces for GPU voltage:
*
@@ -2035,9 +2203,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
*/
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
+static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
+static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
@@ -2064,6 +2244,18 @@ static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_temp1_input.dev_attr.attr,
&sensor_dev_attr_temp1_crit.dev_attr.attr,
&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp2_input.dev_attr.attr,
+ &sensor_dev_attr_temp2_crit.dev_attr.attr,
+ &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp3_input.dev_attr.attr,
+ &sensor_dev_attr_temp3_crit.dev_attr.attr,
+ &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp1_emergency.dev_attr.attr,
+ &sensor_dev_attr_temp2_emergency.dev_attr.attr,
+ &sensor_dev_attr_temp3_emergency.dev_attr.attr,
+ &sensor_dev_attr_temp1_label.dev_attr.attr,
+ &sensor_dev_attr_temp2_label.dev_attr.attr,
+ &sensor_dev_attr_temp3_label.dev_attr.attr,
&sensor_dev_attr_pwm1.dev_attr.attr,
&sensor_dev_attr_pwm1_enable.dev_attr.attr,
&sensor_dev_attr_pwm1_min.dev_attr.attr,
@@ -2186,6 +2378,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
return 0;
+ /* only SOC15 dGPUs support hotspot and mem temperatures */
+ if (((adev->flags & AMD_IS_APU) ||
+ adev->asic_type < CHIP_VEGA10) &&
+ (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
+ return 0;
+
return effective_mode;
}
@@ -2612,6 +2820,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
"gpu_busy_level\n");
return ret;
}
+ /* APU does not have its own dedicated memory */
+ if (!(adev->flags & AMD_IS_APU)) {
+ ret = device_create_file(adev->dev,
+ &dev_attr_mem_busy_percent);
+ if (ret) {
+ DRM_ERROR("failed to create device file "
+ "mem_busy_percent\n");
+ return ret;
+ }
+ }
/* PCIe Perf counters won't work on APU nodes */
if (!(adev->flags & AMD_IS_APU)) {
ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
@@ -2620,6 +2838,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
return ret;
}
}
+ if (adev->unique_id)
+ ret = device_create_file(adev->dev, &dev_attr_unique_id);
+ if (ret) {
+ DRM_ERROR("failed to create device file unique_id\n");
+ return ret;
+ }
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -2678,7 +2902,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
&dev_attr_pp_od_clk_voltage);
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
if (!(adev->flags & AMD_IS_APU))
+ device_remove_file(adev->dev, &dev_attr_mem_busy_percent);
+ if (!(adev->flags & AMD_IS_APU))
device_remove_file(adev->dev, &dev_attr_pcie_bw);
+ if (adev->unique_id)
+ device_remove_file(adev->dev, &dev_attr_unique_id);
if ((adev->asic_type >= CHIP_VEGA10) &&
!(adev->flags & AMD_IS_APU))
device_remove_file(adev->dev, &dev_attr_ppfeatures);
@@ -2775,6 +3003,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
/* GPU Load */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))
seq_printf(m, "GPU Load: %u %%\n", value);
+ /* MEM Load */
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size))
+ seq_printf(m, "MEM Load: %u %%\n", value);
+
seq_printf(m, "\n");
/* SMC feature mask */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 86cc24b2e0aa..af9835c8395d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -289,6 +289,34 @@ static int psp_asd_load(struct psp_context *psp)
return ret;
}
+static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint32_t id, uint32_t value)
+{
+ cmd->cmd_id = GFX_CMD_ID_PROG_REG;
+ cmd->cmd.cmd_setup_reg_prog.reg_value = value;
+ cmd->cmd.cmd_setup_reg_prog.reg_id = id;
+}
+
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+ uint32_t value)
+{
+ struct psp_gfx_cmd_resp *cmd = NULL;
+ int ret = 0;
+
+ if (reg >= PSP_REG_LAST)
+ return -EINVAL;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ psp_prep_reg_prog_cmd_buf(cmd, reg, value);
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ kfree(cmd);
+ return ret;
+}
+
static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
uint32_t xgmi_ta_size, uint32_t shared_size)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index cde113f07c96..cf49539b0b07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -62,6 +62,14 @@ struct psp_ring
uint32_t ring_size;
};
+/* More registers may will be supported */
+enum psp_reg_prog_id {
+ PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
+ PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
+ PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
+ PSP_REG_LAST
+};
+
struct psp_funcs
{
int (*init_microcode)(struct psp_context *psp);
@@ -95,12 +103,26 @@ struct psp_funcs
int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
};
+#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
+struct psp_xgmi_node_info {
+ uint64_t node_id;
+ uint8_t num_hops;
+ uint8_t is_sharing_enabled;
+ enum ta_xgmi_assigned_sdma_engine sdma_engine;
+};
+
+struct psp_xgmi_topology_info {
+ uint32_t num_nodes;
+ struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
+};
+
struct psp_xgmi_context {
uint8_t initialized;
uint32_t session_id;
struct amdgpu_bo *xgmi_shared_bo;
uint64_t xgmi_shared_mc_addr;
void *xgmi_shared_buf;
+ struct psp_xgmi_topology_info top_info;
};
struct psp_ras_context {
@@ -181,18 +203,6 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID);
};
-#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
-struct psp_xgmi_node_info {
- uint64_t node_id;
- uint8_t num_hops;
- uint8_t is_sharing_enabled;
- enum ta_xgmi_assigned_sdma_engine sdma_engine;
-};
-
-struct psp_xgmi_topology_info {
- uint32_t num_nodes;
- struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
-};
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
@@ -250,5 +260,6 @@ int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable);
extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
-
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+ uint32_t value);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 22bd21efe6b1..7c8a4aedf07c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -90,6 +90,12 @@ struct ras_manager {
struct ras_err_data err_data;
};
+struct ras_badpage {
+ unsigned int bp;
+ unsigned int size;
+ unsigned int flags;
+};
+
const char *ras_error_string[] = {
"none",
"parity",
@@ -118,7 +124,8 @@ const char *ras_block_string[] = {
#define ras_err_str(i) (ras_error_string[ffs(i)])
#define ras_block_str(i) (ras_block_string[i])
-#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
+#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
+#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
static void amdgpu_ras_self_test(struct amdgpu_device *adev)
@@ -237,8 +244,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return 0;
}
-/*
- * DOC: ras debugfs control interface
+/**
+ * DOC: AMDGPU RAS debugfs control interface
*
* It accepts struct ras_debug_if who has two members.
*
@@ -521,6 +528,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
enable ? "enable":"disable",
ras_block_str(head->block),
ret);
+ if (ret == TA_RAS_STATUS__RESET_NEEDED)
+ return -EAGAIN;
return -EINVAL;
}
@@ -541,16 +550,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
return -EINVAL;
if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
- /* If ras is enabled by vbios, we set up ras object first in
- * both case. For enable, that is all what we need do. For
- * disable, we need perform a ras TA disable cmd after that.
- */
- ret = __amdgpu_ras_feature_enable(adev, head, 1);
- if (ret)
- return ret;
+ if (enable) {
+ /* There is no harm to issue a ras TA cmd regardless of
+ * the currecnt ras state.
+ * If current state == target state, it will do nothing
+ * But sometimes it requests driver to reset and repost
+ * with error code -EAGAIN.
+ */
+ ret = amdgpu_ras_feature_enable(adev, head, 1);
+ /* With old ras TA, we might fail to enable ras.
+ * Log it and just setup the object.
+ * TODO need remove this WA in the future.
+ */
+ if (ret == -EINVAL) {
+ ret = __amdgpu_ras_feature_enable(adev, head, 1);
+ if (!ret)
+ DRM_INFO("RAS INFO: %s setup object\n",
+ ras_block_str(head->block));
+ }
+ } else {
+ /* setup the object then issue a ras TA disable cmd.*/
+ ret = __amdgpu_ras_feature_enable(adev, head, 1);
+ if (ret)
+ return ret;
- if (!enable)
ret = amdgpu_ras_feature_enable(adev, head, 0);
+ }
} else
ret = amdgpu_ras_feature_enable(adev, head, enable);
@@ -691,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* sysfs begin */
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage **bps, unsigned int *count);
+
+static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
+{
+ switch (flags) {
+ case 0:
+ return "R";
+ case 1:
+ return "P";
+ case 2:
+ default:
+ return "F";
+ };
+}
+
+/*
+ * DOC: ras sysfs gpu_vram_bad_pages interface
+ *
+ * It allows user to read the bad pages of vram on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
+ *
+ * It outputs multiple lines, and each line stands for one gpu page.
+ *
+ * The format of one line is below,
+ * gpu pfn : gpu page size : flags
+ *
+ * gpu pfn and gpu page size are printed in hex format.
+ * flags can be one of below character,
+ * R: reserved, this gpu page is reserved and not able to use.
+ * P: pending for reserve, this gpu page is marked as bad, will be reserved
+ * in next window of page_reserve.
+ * F: unable to reserve. this gpu page can't be reserved due to some reasons.
+ *
+ * examples:
+ * 0x00000001 : 0x00001000 : R
+ * 0x00000002 : 0x00001000 : P
+ */
+
+static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
+ struct kobject *kobj, struct bin_attribute *attr,
+ char *buf, loff_t ppos, size_t count)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, badpages_attr);
+ struct amdgpu_device *adev = con->adev;
+ const unsigned int element_size =
+ sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
+ unsigned int start = div64_ul(ppos + element_size - 1, element_size);
+ unsigned int end = div64_ul(ppos + count - 1, element_size);
+ ssize_t s = 0;
+ struct ras_badpage *bps = NULL;
+ unsigned int bps_count = 0;
+
+ memset(buf, 0, count);
+
+ if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
+ return 0;
+
+ for (; start < end && start < bps_count; start++)
+ s += scnprintf(&buf[s], element_size + 1,
+ "0x%08x : 0x%08x : %1s\n",
+ bps[start].bp,
+ bps[start].size,
+ amdgpu_ras_badpage_flags_str(bps[start].flags));
+
+ kfree(bps);
+
+ return s;
+}
+
static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -731,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
&con->features_attr.attr,
NULL
};
+ struct bin_attribute *bin_attrs[] = {
+ &con->badpages_attr,
+ NULL
+ };
struct attribute_group group = {
.name = "ras",
.attrs = attrs,
+ .bin_attrs = bin_attrs,
};
con->features_attr = (struct device_attribute) {
@@ -743,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
},
.show = amdgpu_ras_sysfs_features_read,
};
+
+ con->badpages_attr = (struct bin_attribute) {
+ .attr = {
+ .name = "gpu_vram_bad_pages",
+ .mode = S_IRUGO,
+ },
+ .size = 0,
+ .private = NULL,
+ .read = amdgpu_ras_sysfs_badpages_read,
+ };
+
sysfs_attr_init(attrs[0]);
+ sysfs_bin_attr_init(bin_attrs[0]);
return sysfs_create_group(&adev->dev->kobj, &group);
}
@@ -755,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
&con->features_attr.attr,
NULL
};
+ struct bin_attribute *bin_attrs[] = {
+ &con->badpages_attr,
+ NULL
+ };
struct attribute_group group = {
.name = "ras",
.attrs = attrs,
+ .bin_attrs = bin_attrs,
};
sysfs_remove_group(&adev->dev->kobj, &group);
@@ -1089,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
/* ih end */
/* recovery begin */
+
+/* return 0 on success.
+ * caller need free bps.
+ */
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage **bps, unsigned int *count)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data;
+ int i = 0;
+ int ret = 0;
+
+ if (!con || !con->eh_data || !bps || !count)
+ return -EINVAL;
+
+ mutex_lock(&con->recovery_lock);
+ data = con->eh_data;
+ if (!data || data->count == 0) {
+ *bps = NULL;
+ goto out;
+ }
+
+ *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
+ if (!*bps) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (; i < data->count; i++) {
+ (*bps)[i] = (struct ras_badpage){
+ .bp = data->bps[i].bp,
+ .size = AMDGPU_GPU_PAGE_SIZE,
+ .flags = 0,
+ };
+
+ if (data->last_reserved <= i)
+ (*bps)[i].flags = 1;
+ else if (data->bps[i].bo == NULL)
+ (*bps)[i].flags = 2;
+ }
+
+ *count = data->count;
+out:
+ mutex_unlock(&con->recovery_lock);
+ return ret;
+}
+
static void amdgpu_ras_do_recovery(struct work_struct *work)
{
struct amdgpu_ras *ras =
@@ -1340,6 +1505,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
}
/* recovery end */
+/* return 0 if ras will reset gpu and repost.*/
+int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
+ unsigned int block)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!ras)
+ return -EINVAL;
+
+ ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
+ return 0;
+}
+
/*
* check hardware's ras ability which will be saved in hw_supported.
* if hardware does not support ras, we can skip some ras initializtion and
@@ -1415,8 +1593,10 @@ recovery_out:
return -EINVAL;
}
-/* do some init work after IP late init as dependence */
-void amdgpu_ras_post_init(struct amdgpu_device *adev)
+/* do some init work after IP late init as dependence.
+ * and it runs in resume/gpu reset/booting up cases.
+ */
+void amdgpu_ras_resume(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj, *tmp;
@@ -1444,6 +1624,32 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev)
}
}
}
+
+ if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
+ con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
+ /* setup ras obj state as disabled.
+ * for init_by_vbios case.
+ * if we want to enable ras, just enable it in a normal way.
+ * If we want do disable it, need setup ras obj as enabled,
+ * then issue another TA disable cmd.
+ * See feature_enable_on_boot
+ */
+ amdgpu_ras_disable_all_features(adev, 1);
+ amdgpu_ras_reset_gpu(adev, 0);
+ }
+}
+
+void amdgpu_ras_suspend(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!con)
+ return;
+
+ amdgpu_ras_disable_all_features(adev, 0);
+ /* Make sure all ras objects are disabled. */
+ if (con->features)
+ amdgpu_ras_disable_all_features(adev, 1);
}
/* do some fini work before IP fini as dependence */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index eaef5edefc34..c6b34fbd695f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -93,6 +93,7 @@ struct amdgpu_ras {
struct dentry *ent;
/* sysfs */
struct device_attribute features_attr;
+ struct bin_attribute badpages_attr;
/* block array */
struct ras_manager *objs;
@@ -175,6 +176,12 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
return ras && (ras->supported & (1 << block));
}
+int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
+ unsigned int block);
+
+void amdgpu_ras_resume(struct amdgpu_device *adev);
+void amdgpu_ras_suspend(struct amdgpu_device *adev);
+
int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
bool is_ce);
@@ -187,13 +194,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
bool is_baco)
{
- /* remove me when gpu reset works on vega20 A1. */
-#if 0
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
schedule_work(&ras->recovery_work);
-#endif
return 0;
}
@@ -255,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
/* called in ip_init and ip_fini */
int amdgpu_ras_init(struct amdgpu_device *adev);
-void amdgpu_ras_post_init(struct amdgpu_device *adev);
int amdgpu_ras_fini(struct amdgpu_device *adev);
int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index d7fae2676269..cdddce938bf5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -114,6 +114,7 @@ struct amdgpu_ring_funcs {
uint32_t align_mask;
u32 nop;
bool support_64bit_ptrs;
+ bool no_user_fence;
unsigned vmhub;
unsigned extra_dw;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0c52d1f9fe0f..7138dc1dd1f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
#include <linux/pagemap.h>
#include <linux/debugfs.h>
#include <linux/iommu.h>
+#include <linux/hmm.h>
#include "amdgpu.h"
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
@@ -703,143 +704,191 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
/*
* TTM backend functions.
*/
-struct amdgpu_ttm_gup_task_list {
- struct list_head list;
- struct task_struct *task;
-};
-
struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
u64 offset;
uint64_t userptr;
struct task_struct *usertask;
uint32_t userflags;
- spinlock_t guptasklock;
- struct list_head guptasks;
- atomic_t mmu_invalidations;
- uint32_t last_set_pages;
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+ struct hmm_range *ranges;
+ int nr_ranges;
+#endif
};
/**
- * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
- * pointer to memory
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
+ * memory and start HMM tracking CPU page table update
*
- * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
- * This provides a wrapper around the get_user_pages() call to provide
- * device accessible pages that back user memory.
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
+ * once afterwards to stop HMM tracking
*/
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+
+/* Support Userptr pages cross max 16 vmas */
+#define MAX_NR_VMAS (16)
+
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
- unsigned int flags = 0;
- unsigned pinned = 0;
- int r;
+ unsigned long start = gtt->userptr;
+ unsigned long end = start + ttm->num_pages * PAGE_SIZE;
+ struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
+ struct hmm_range *ranges;
+ unsigned long nr_pages, i;
+ uint64_t *pfns, f;
+ int r = 0;
if (!mm) /* Happens during process shutdown */
return -ESRCH;
- if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
- flags |= FOLL_WRITE;
-
down_read(&mm->mmap_sem);
- if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
- /*
- * check that we only use anonymous memory to prevent problems
- * with writeback
- */
- unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
- struct vm_area_struct *vma;
+ /* user pages may cross multiple VMAs */
+ gtt->nr_ranges = 0;
+ do {
+ unsigned long vm_start;
- vma = find_vma(mm, gtt->userptr);
- if (!vma || vma->vm_file || vma->vm_end < end) {
- up_read(&mm->mmap_sem);
- return -EPERM;
+ if (gtt->nr_ranges >= MAX_NR_VMAS) {
+ DRM_ERROR("Too many VMAs in userptr range\n");
+ r = -EFAULT;
+ goto out;
}
+
+ vm_start = vma ? vma->vm_end : start;
+ vma = find_vma(mm, vm_start);
+ if (unlikely(!vma || vm_start < vma->vm_start)) {
+ r = -EFAULT;
+ goto out;
+ }
+ vmas[gtt->nr_ranges++] = vma;
+ } while (end > vma->vm_end);
+
+ DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
+ start, gtt->nr_ranges, ttm->num_pages);
+
+ if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+ vmas[0]->vm_file)) {
+ r = -EPERM;
+ goto out;
}
- /* loop enough times using contiguous pages of memory */
- do {
- unsigned num_pages = ttm->num_pages - pinned;
- uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
- struct page **p = pages + pinned;
- struct amdgpu_ttm_gup_task_list guptask;
+ ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
+ if (unlikely(!ranges)) {
+ r = -ENOMEM;
+ goto out;
+ }
- guptask.task = current;
- spin_lock(&gtt->guptasklock);
- list_add(&guptask.list, &gtt->guptasks);
- spin_unlock(&gtt->guptasklock);
+ pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
+ if (unlikely(!pfns)) {
+ r = -ENOMEM;
+ goto out_free_ranges;
+ }
- if (mm == current->mm)
- r = get_user_pages(userptr, num_pages, flags, p, NULL);
- else
- r = get_user_pages_remote(gtt->usertask,
- mm, userptr, num_pages,
- flags, p, NULL, NULL);
+ for (i = 0; i < gtt->nr_ranges; i++)
+ amdgpu_hmm_init_range(&ranges[i]);
- spin_lock(&gtt->guptasklock);
- list_del(&guptask.list);
- spin_unlock(&gtt->guptasklock);
+ f = ranges[0].flags[HMM_PFN_VALID];
+ f |= amdgpu_ttm_tt_is_readonly(ttm) ?
+ 0 : ranges[0].flags[HMM_PFN_WRITE];
+ memset64(pfns, f, ttm->num_pages);
- if (r < 0)
- goto release_pages;
+ for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
+ ranges[i].vma = vmas[i];
+ ranges[i].start = max(start, vmas[i]->vm_start);
+ ranges[i].end = min(end, vmas[i]->vm_end);
+ ranges[i].pfns = pfns + nr_pages;
+ nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
- pinned += r;
+ r = hmm_vma_fault(&ranges[i], true);
+ if (unlikely(r))
+ break;
+ }
+ if (unlikely(r)) {
+ while (i--)
+ hmm_vma_range_done(&ranges[i]);
- } while (pinned < ttm->num_pages);
+ goto out_free_pfns;
+ }
up_read(&mm->mmap_sem);
+
+ for (i = 0; i < ttm->num_pages; i++) {
+ pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]);
+ if (!pages[i]) {
+ pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
+ i, pfns[i]);
+ goto out_invalid_pfn;
+ }
+ }
+ gtt->ranges = ranges;
+
return 0;
-release_pages:
- release_pages(pages, pinned);
+out_free_pfns:
+ kvfree(pfns);
+out_free_ranges:
+ kvfree(ranges);
+out:
up_read(&mm->mmap_sem);
+
return r;
+
+out_invalid_pfn:
+ for (i = 0; i < gtt->nr_ranges; i++)
+ hmm_vma_range_done(&ranges[i]);
+ kvfree(pfns);
+ kvfree(ranges);
+ return -ENOMEM;
}
/**
- * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
+ * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
+ * Check if the pages backing this ttm range have been invalidated
*
- * Called by amdgpu_cs_list_validate(). This creates the page list
- * that backs user memory and will ultimately be mapped into the device
- * address space.
+ * Returns: true if pages are still valid
*/
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- unsigned i;
+ bool r = false;
+ int i;
- gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
- for (i = 0; i < ttm->num_pages; ++i) {
- if (ttm->pages[i])
- put_page(ttm->pages[i]);
+ if (!gtt || !gtt->userptr)
+ return false;
- ttm->pages[i] = pages ? pages[i] : NULL;
+ DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n",
+ gtt->userptr, gtt->nr_ranges, ttm->num_pages);
+
+ WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns,
+ "No user pages to check\n");
+
+ if (gtt->ranges) {
+ for (i = 0; i < gtt->nr_ranges; i++)
+ r |= hmm_vma_range_done(&gtt->ranges[i]);
+ kvfree(gtt->ranges[0].pfns);
+ kvfree(gtt->ranges);
+ gtt->ranges = NULL;
}
+
+ return r;
}
+#endif
/**
- * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
+ * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
*
- * Called while unpinning userptr pages
+ * Called by amdgpu_cs_list_validate(). This creates the page list
+ * that backs user memory and will ultimately be mapped into the device
+ * address space.
*/
-void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- unsigned i;
-
- for (i = 0; i < ttm->num_pages; ++i) {
- struct page *page = ttm->pages[i];
+ unsigned long i;
- if (!page)
- continue;
-
- if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
- set_page_dirty(page);
-
- mark_page_accessed(page);
- }
+ for (i = 0; i < ttm->num_pages; ++i)
+ ttm->pages[i] = pages ? pages[i] : NULL;
}
/**
@@ -901,10 +950,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
/* unmap the pages mapped to the device */
dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
- /* mark the pages as dirty */
- amdgpu_ttm_tt_mark_user_pages(ttm);
-
sg_free_table(ttm->sg);
+
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+ if (gtt->ranges &&
+ ttm->pages[0] == hmm_pfn_to_page(&gtt->ranges[0],
+ gtt->ranges[0].pfns[0]))
+ WARN_ONCE(1, "Missing get_user_page_done\n");
+#endif
}
int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
@@ -1254,11 +1307,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
gtt->usertask = current->group_leader;
get_task_struct(gtt->usertask);
- spin_lock_init(&gtt->guptasklock);
- INIT_LIST_HEAD(&gtt->guptasks);
- atomic_set(&gtt->mmu_invalidations, 0);
- gtt->last_set_pages = 0;
-
return 0;
}
@@ -1287,7 +1335,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- struct amdgpu_ttm_gup_task_list *entry;
unsigned long size;
if (gtt == NULL || !gtt->userptr)
@@ -1300,48 +1347,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
- /* Search the lists of tasks that hold this mapping and see
- * if current is one of them. If it is return false.
- */
- spin_lock(&gtt->guptasklock);
- list_for_each_entry(entry, &gtt->guptasks, list) {
- if (entry->task == current) {
- spin_unlock(&gtt->guptasklock);
- return false;
- }
- }
- spin_unlock(&gtt->guptasklock);
-
- atomic_inc(&gtt->mmu_invalidations);
-
return true;
}
/**
- * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
- */
-bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
- int *last_invalidated)
-{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- int prev_invalidated = *last_invalidated;
-
- *last_invalidated = atomic_read(&gtt->mmu_invalidations);
- return prev_invalidated != *last_invalidated;
-}
-
-/**
- * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
- * been invalidated since the last time they've been set?
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
*/
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
if (gtt == NULL || !gtt->userptr)
return false;
- return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
+ return true;
}
/**
@@ -1753,44 +1772,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* Initialize various on-chip memory pools */
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
- adev->gds.mem.total_size);
+ adev->gds.gds_size);
if (r) {
DRM_ERROR("Failed initializing GDS heap.\n");
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
- 4, AMDGPU_GEM_DOMAIN_GDS,
- &adev->gds.gds_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
- adev->gds.gws.total_size);
+ adev->gds.gws_size);
if (r) {
DRM_ERROR("Failed initializing gws heap.\n");
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
- 1, AMDGPU_GEM_DOMAIN_GWS,
- &adev->gds.gws_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
- adev->gds.oa.total_size);
+ adev->gds.oa_size);
if (r) {
DRM_ERROR("Failed initializing oa heap.\n");
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
- 1, AMDGPU_GEM_DOMAIN_OA,
- &adev->gds.oa_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
/* Register debugfs entries for amdgpu_ttm */
r = amdgpu_ttm_debugfs_init(adev);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index b5b2d101f7db..c2b7669004ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -101,9 +101,21 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
+#else
+static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
+{
+ return -EPERM;
+}
+static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
+{
+ return false;
+}
+#endif
+
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
-void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
uint32_t flags);
bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
@@ -112,7 +124,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end);
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated);
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 7b33867036e7..33c1eb76c076 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -313,6 +313,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
return AMDGPU_FW_LOAD_DIRECT;
}
+#define FW_VERSION_ATTR(name, mode, field) \
+static ssize_t show_##name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct drm_device *ddev = dev_get_drvdata(dev); \
+ struct amdgpu_device *adev = ddev->dev_private; \
+ \
+ return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \
+} \
+static DEVICE_ATTR(name, mode, show_##name, NULL)
+
+FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version);
+FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version);
+FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version);
+FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version);
+FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version);
+FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version);
+FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version);
+FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version);
+FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
+FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
+FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
+FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
+FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version);
+FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version);
+FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version);
+FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version);
+FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
+FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
+FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
+FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
+FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
+
+static struct attribute *fw_attrs[] = {
+ &dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
+ &dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr,
+ &dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr,
+ &dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr,
+ &dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr,
+ &dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr,
+ &dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr,
+ &dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
+ &dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
+ &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
+ &dev_attr_dmcu_fw_version.attr, NULL
+};
+
+static const struct attribute_group fw_attr_group = {
+ .name = "fw_version",
+ .attrs = fw_attrs
+};
+
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
+{
+ return sysfs_create_group(&adev->dev->kobj, &fw_attr_group);
+}
+
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
+{
+ sysfs_remove_group(&adev->dev->kobj, &fw_attr_group);
+}
+
static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
struct amdgpu_firmware_info *ucode,
uint64_t mc_addr, void *kptr)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 7ac25a1c7853..ec4c2ea1f05a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -291,7 +291,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev);
void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev);
enum amdgpu_firmware_load_type
amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index ecf6f96df2ad..118451f5e3aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -212,132 +212,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
return 0;
}
-static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state)
-{
- int ret_code;
- uint32_t reg_data = 0;
- uint32_t reg_data2 = 0;
- struct amdgpu_ring *ring;
-
- /* pause/unpause if state is changed */
- if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
- DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
- adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
- new_state->fw_based, new_state->jpeg);
-
- reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
- (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
-
- if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
- ret_code = 0;
-
- if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
- UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-
- if (!ret_code) {
- /* pause DPG non-jpeg */
- reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
- WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
- UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
- UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
-
- /* Restore */
- ring = &adev->vcn.ring_enc[0];
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
-
- ring = &adev->vcn.ring_enc[1];
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
-
- ring = &adev->vcn.ring_dec;
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
- RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
- UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
- }
- } else {
- /* unpause dpg non-jpeg, no need to wait */
- reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
- WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
- }
- adev->vcn.pause_state.fw_based = new_state->fw_based;
- }
-
- /* pause/unpause if state is changed */
- if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
- DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
- adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
- new_state->fw_based, new_state->jpeg);
-
- reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
- (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
-
- if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
- ret_code = 0;
-
- if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
- UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-
- if (!ret_code) {
- /* Make sure JPRG Snoop is disabled before sending the pause */
- reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
- reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
- WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
-
- /* pause DPG jpeg */
- reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
- WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
- UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
- UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
-
- /* Restore */
- ring = &adev->vcn.ring_jpeg;
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
- UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
- UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
- UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
-
- ring = &adev->vcn.ring_dec;
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
- RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
- UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
- }
- } else {
- /* unpause dpg jpeg, no need to wait */
- reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
- WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
- }
- adev->vcn.pause_state.jpeg = new_state->jpeg;
- }
-
- return 0;
-}
-
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
@@ -362,7 +236,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
- amdgpu_vcn_pause_dpg_mode(adev, &new_state);
+ adev->vcn.pause_dpg_mode(adev, &new_state);
}
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
@@ -417,7 +291,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
new_state.jpeg = VCN_DPG_STATE__PAUSE;
- amdgpu_vcn_pause_dpg_mode(adev, &new_state);
+ adev->vcn.pause_dpg_mode(adev, &new_state);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index a0ad19af9080..a1ee19251aae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -45,6 +45,27 @@
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
+#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
+ ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
+ UVD_DPG_LMA_CTL__MASK_EN_MASK | \
+ ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
+ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
+ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \
+ })
+
+#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
+ do { \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
+ UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
+ ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
+ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
+ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ } while (0)
+
enum engine_status_constants {
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
@@ -81,6 +102,8 @@ struct amdgpu_vcn {
unsigned num_enc_rings;
enum amd_powergating_state cur_state;
struct dpg_pause_state pause_state;
+ int (*pause_dpg_mode)(struct amdgpu_device *adev,
+ struct dpg_pause_state *new_state);
};
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 7d484fad3909..1f0bd4d16475 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -426,3 +426,47 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)
return clk;
}
+void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (virt->ops && virt->ops->init_reg_access_mode)
+ virt->ops->init_reg_access_mode(adev);
+}
+
+bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
+{
+ bool ret = false;
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (amdgpu_sriov_vf(adev)
+ && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
+ ret = true;
+
+ return ret;
+}
+
+bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
+{
+ bool ret = false;
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (amdgpu_sriov_vf(adev)
+ && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC)
+ && !(amdgpu_sriov_runtime(adev)))
+ ret = true;
+
+ return ret;
+}
+
+bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
+{
+ bool ret = false;
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (amdgpu_sriov_vf(adev)
+ && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
+ ret = true;
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 584947b7ccf3..dca25deee75c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer {
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
};
+/* According to the fw feature, some new reg access modes are supported */
+#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */
+#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */
+#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */
+#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */
+
/**
* struct amdgpu_virt_ops - amdgpu device virt operations
*/
@@ -59,6 +65,7 @@ struct amdgpu_virt_ops {
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
+ void (*init_reg_access_mode)(struct amdgpu_device *adev);
};
/*
@@ -258,6 +265,7 @@ struct amdgpu_virt {
uint32_t gim_feature;
/* protect DPM events to GIM */
struct mutex dpm_mutex;
+ uint32_t reg_access_mode;
};
#define amdgpu_sriov_enabled(adev) \
@@ -307,4 +315,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
+void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
+bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
+bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev);
+bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index a48c84c51775..d11eba09eadd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -40,6 +40,34 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
return &hive->device_list;
}
+/**
+ * DOC: AMDGPU XGMI Support
+ *
+ * XGMI is a high speed interconnect that joins multiple GPU cards
+ * into a homogeneous memory space that is organized by a collective
+ * hive ID and individual node IDs, both of which are 64-bit numbers.
+ *
+ * The file xgmi_device_id contains the unique per GPU device ID and
+ * is stored in the /sys/class/drm/card${cardno}/device/ directory.
+ *
+ * Inside the device directory a sub-directory 'xgmi_hive_info' is
+ * created which contains the hive ID and the list of nodes.
+ *
+ * The hive ID is stored in:
+ * /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id
+ *
+ * The node information is stored in numbered directories:
+ * /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id
+ *
+ * Each device has their own xgmi_hive_info direction with a mirror
+ * set of node sub-directories.
+ *
+ * The XGMI memory space is built by contiguously adding the power of
+ * two padded VRAM space from each node to each other.
+ *
+ */
+
+
static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -238,7 +266,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
/* Each psp need to set the latest topology */
ret = psp_xgmi_set_topology_info(&adev->psp,
hive->number_devices,
- &hive->topology_info);
+ &adev->psp.xgmi_context.top_info);
if (ret)
dev_err(adev->dev,
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
@@ -248,9 +276,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
return ret;
}
+
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
+{
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0 ; i < top->num_nodes; ++i)
+ if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
+ return top->nodes[i].num_hops;
+ return -EINVAL;
+}
+
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
- struct psp_xgmi_topology_info *hive_topology;
+ struct psp_xgmi_topology_info *top_info;
struct amdgpu_hive_info *hive;
struct amdgpu_xgmi *entry;
struct amdgpu_device *tmp_adev = NULL;
@@ -283,35 +324,46 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
- hive_topology = &hive->topology_info;
+ top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
list_for_each_entry(entry, &hive->device_list, head)
- hive_topology->nodes[count++].node_id = entry->node_id;
+ top_info->nodes[count++].node_id = entry->node_id;
+ top_info->num_nodes = count;
hive->number_devices = count;
- /* Each psp need to get the latest topology */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
+ /* update node list for other device in the hive */
+ if (tmp_adev != adev) {
+ top_info = &tmp_adev->psp.xgmi_context.top_info;
+ top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id;
+ top_info->num_nodes = count;
+ }
+ ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
+ if (ret)
+ goto exit;
+ }
+
+ /* get latest topology info for each device from psp */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+ &tmp_adev->psp.xgmi_context.top_info);
if (ret) {
dev_err(tmp_adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
tmp_adev->gmc.xgmi.node_id,
tmp_adev->gmc.xgmi.hive_id, ret);
/* To do : continue with some node failed or disable the whole hive */
- break;
+ goto exit;
}
}
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
- if (ret)
- break;
- }
-
if (!ret)
ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
+
+ mutex_unlock(&hive->hive_lock);
+exit:
if (!ret)
dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
@@ -320,9 +372,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
ret);
-
- mutex_unlock(&hive->hive_lock);
-exit:
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 3e9c91e9a4bf..fbcee31788c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -27,7 +27,6 @@
struct amdgpu_hive_info {
uint64_t hive_id;
struct list_head device_list;
- struct psp_xgmi_topology_info topology_info;
int number_devices;
struct mutex hive_lock, reset_lock;
struct kobject *kobj;
@@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 07c1f239e9c3..3a4f20766a39 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1804,6 +1804,18 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
+static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
static const struct amdgpu_asic_funcs cik_asic_funcs =
{
.read_disabled_bios = &cik_read_disabled_bios,
@@ -1821,6 +1833,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.init_doorbell_index = &legacy_doorbell_index_init,
.get_pcie_usage = &cik_get_pcie_usage,
.need_reset_on_init = &cik_need_reset_on_init,
+ .get_pcie_replay_count = &cik_get_pcie_replay_count,
};
static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index d5ebe566809b..8c09bf994acd 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -105,6 +105,431 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
*flags |= AMD_CG_SUPPORT_DF_MGCG;
}
+/* hold counter assignment per gpu struct */
+struct df_v3_6_event_mask {
+ struct amdgpu_device gpu;
+ uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS];
+};
+
+/* get assigned df perfmon ctr as int */
+static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
+ uint64_t config,
+ int *counter)
+{
+ struct df_v3_6_event_mask *mask;
+ int i;
+
+ mask = container_of(adev, struct df_v3_6_event_mask, gpu);
+
+ for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
+ if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) {
+ *counter = i;
+ return;
+ }
+ }
+}
+
+/* get address based on counter assignment */
+static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
+ uint64_t config,
+ int is_ctrl,
+ uint32_t *lo_base_addr,
+ uint32_t *hi_base_addr)
+{
+
+ int target_cntr = -1;
+
+ df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
+
+ if (target_cntr < 0)
+ return;
+
+ switch (target_cntr) {
+
+ case 0:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0;
+ break;
+ case 1:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1;
+ break;
+ case 2:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2;
+ break;
+ case 3:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3;
+ break;
+
+ }
+
+}
+
+/* get read counter address */
+static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
+ uint64_t config,
+ uint32_t *lo_base_addr,
+ uint32_t *hi_base_addr)
+{
+ df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr);
+}
+
+/* get control counter settings i.e. address and values to set */
+static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
+ uint64_t config,
+ uint32_t *lo_base_addr,
+ uint32_t *hi_base_addr,
+ uint32_t *lo_val,
+ uint32_t *hi_val)
+{
+
+ uint32_t eventsel, instance, unitmask;
+ uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0;
+
+ df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
+
+ if (lo_val == NULL || hi_val == NULL)
+ return;
+
+ if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
+ DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x",
+ *lo_base_addr, *hi_base_addr);
+ return;
+ }
+
+ eventsel = GET_EVENT(config);
+ instance = GET_INSTANCE(config);
+ unitmask = GET_UNITMASK(config);
+
+ es_5_0 = eventsel & 0x3FUL;
+ es_13_6 = instance;
+ es_13_0 = (es_13_6 << 6) + es_5_0;
+ es_13_12 = (es_13_0 & 0x03000UL) >> 12;
+ es_11_8 = (es_13_0 & 0x0F00UL) >> 8;
+ es_7_0 = es_13_0 & 0x0FFUL;
+ *lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8);
+ *hi_val = (es_11_8 | ((es_13_12)<<(29)));
+}
+
+/* assign df performance counters for read */
+static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
+ uint64_t config,
+ int *is_assigned)
+{
+
+ struct df_v3_6_event_mask *mask;
+ int i, target_cntr;
+
+ target_cntr = -1;
+
+ *is_assigned = 0;
+
+ df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
+
+ if (target_cntr >= 0) {
+ *is_assigned = 1;
+ return 0;
+ }
+
+ mask = container_of(adev, struct df_v3_6_event_mask, gpu);
+
+ for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
+ if (mask->config_assign_mask[i] == 0ULL) {
+ mask->config_assign_mask[i] = config & 0x0FFFFFFUL;
+ return 0;
+ }
+ }
+
+ return -ENOSPC;
+}
+
+/* release performance counter */
+static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
+ uint64_t config)
+{
+
+ struct df_v3_6_event_mask *mask;
+ int target_cntr;
+
+ target_cntr = -1;
+
+ df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
+
+ mask = container_of(adev, struct df_v3_6_event_mask, gpu);
+
+ if (target_cntr >= 0)
+ mask->config_assign_mask[target_cntr] = 0ULL;
+
+}
+
+/*
+ * get xgmi link counters via programmable data fabric (df) counters (max 4)
+ * using cake tx event.
+ *
+ * @adev -> amdgpu device
+ * @instance-> currently cake has 2 links to poll on vega20
+ * @count -> counters to pass
+ *
+ */
+
+static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev,
+ int instance,
+ uint64_t *count)
+{
+ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+ uint64_t config;
+
+ config = GET_INSTANCE_CONFIG(instance);
+
+ df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
+ &hi_base_addr);
+
+ if ((lo_base_addr == 0) || (hi_base_addr == 0))
+ return;
+
+ lo_val = RREG32_PCIE(lo_base_addr);
+ hi_val = RREG32_PCIE(hi_base_addr);
+
+ *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
+}
+
+/*
+ * reset xgmi link counters
+ *
+ * @adev -> amdgpu device
+ * @instance-> currently cake has 2 links to poll on vega20
+ *
+ */
+static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev,
+ int instance)
+{
+ uint32_t lo_base_addr, hi_base_addr;
+ uint64_t config;
+
+ config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16);
+
+ df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
+ &hi_base_addr);
+
+ if ((lo_base_addr == 0) || (hi_base_addr == 0))
+ return;
+
+ WREG32_PCIE(lo_base_addr, 0UL);
+ WREG32_PCIE(hi_base_addr, 0UL);
+}
+
+/*
+ * add xgmi link counters
+ *
+ * @adev -> amdgpu device
+ * @instance-> currently cake has 2 links to poll on vega20
+ *
+ */
+
+static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev,
+ int instance)
+{
+ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+ uint64_t config;
+ int ret, is_assigned;
+
+ if (instance < 0 || instance > 1)
+ return -EINVAL;
+
+ config = GET_INSTANCE_CONFIG(instance);
+
+ ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);
+
+ if (ret || is_assigned)
+ return ret;
+
+ df_v3_6_pmc_get_ctrl_settings(adev,
+ config,
+ &lo_base_addr,
+ &hi_base_addr,
+ &lo_val,
+ &hi_val);
+
+ WREG32_PCIE(lo_base_addr, lo_val);
+ WREG32_PCIE(hi_base_addr, hi_val);
+
+ return ret;
+}
+
+
+/*
+ * start xgmi link counters
+ *
+ * @adev -> amdgpu device
+ * @instance-> currently cake has 2 links to poll on vega20
+ * @is_enable -> either resume or assign event via df perfmon
+ *
+ */
+
+static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev,
+ int instance,
+ int is_enable)
+{
+ uint32_t lo_base_addr, hi_base_addr, lo_val;
+ uint64_t config;
+ int ret;
+
+ if (instance < 0 || instance > 1)
+ return -EINVAL;
+
+ if (is_enable) {
+
+ ret = df_v3_6_add_xgmi_link_cntr(adev, instance);
+
+ if (ret)
+ return ret;
+
+ } else {
+
+ config = GET_INSTANCE_CONFIG(instance);
+
+ df_v3_6_pmc_get_ctrl_settings(adev,
+ config,
+ &lo_base_addr,
+ &hi_base_addr,
+ NULL,
+ NULL);
+
+ if (lo_base_addr == 0)
+ return -EINVAL;
+
+ lo_val = RREG32_PCIE(lo_base_addr);
+
+ WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
+
+ ret = 0;
+ }
+
+ return ret;
+
+}
+
+/*
+ * start xgmi link counters
+ *
+ * @adev -> amdgpu device
+ * @instance-> currently cake has 2 links to poll on vega20
+ * @is_enable -> either pause or unassign event via df perfmon
+ *
+ */
+
+static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev,
+ int instance,
+ int is_disable)
+{
+
+ uint32_t lo_base_addr, hi_base_addr, lo_val;
+ uint64_t config;
+
+ config = GET_INSTANCE_CONFIG(instance);
+
+ if (is_disable) {
+ df_v3_6_reset_xgmi_link_cntr(adev, instance);
+ df_v3_6_pmc_release_cntr(adev, config);
+ } else {
+
+ df_v3_6_pmc_get_ctrl_settings(adev,
+ config,
+ &lo_base_addr,
+ &hi_base_addr,
+ NULL,
+ NULL);
+
+ if ((lo_base_addr == 0) || (hi_base_addr == 0))
+ return -EINVAL;
+
+ lo_val = RREG32_PCIE(lo_base_addr);
+
+ WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
+ }
+
+ return 0;
+}
+
+static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
+ int is_enable)
+{
+ int xgmi_tx_link, ret = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
+ : (IS_DF_XGMI_1_TX(config) ? 1 : -1);
+
+ if (xgmi_tx_link >= 0)
+ ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link,
+ is_enable);
+
+ if (ret)
+ return ret;
+
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
+ int is_disable)
+{
+ int xgmi_tx_link, ret = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
+ : (IS_DF_XGMI_1_TX(config) ? 1 : -1);
+
+ if (xgmi_tx_link >= 0) {
+ ret = df_v3_6_stop_xgmi_link_cntr(adev,
+ xgmi_tx_link,
+ is_disable);
+ if (ret)
+ return ret;
+ }
+
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
+ uint64_t config,
+ uint64_t *count)
+{
+
+ int xgmi_tx_link;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
+ : (IS_DF_XGMI_1_TX(config) ? 1 : -1);
+
+ if (xgmi_tx_link >= 0) {
+ df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link);
+ df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+}
+
const struct amdgpu_df_funcs df_v3_6_funcs = {
.init = df_v3_6_init,
.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
@@ -113,4 +538,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
.update_medium_grain_clock_gating =
df_v3_6_update_medium_grain_clock_gating,
.get_clockgating_state = df_v3_6_get_clockgating_state,
+ .pmc_start = df_v3_6_pmc_start,
+ .pmc_stop = df_v3_6_pmc_stop,
+ .pmc_get_count = df_v3_6_pmc_get_count
};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
index e79c58e5efcb..fcffd807764d 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
@@ -35,6 +35,23 @@ enum DF_V3_6_MGCG {
DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
};
+/* Defined in global_features.h as FTI_PERFMON_VISIBLE */
+#define AMDGPU_DF_MAX_COUNTERS 4
+
+/* get flags from df perfmon config */
+#define GET_EVENT(x) (x & 0xFFUL)
+#define GET_INSTANCE(x) ((x >> 8) & 0xFFUL)
+#define GET_UNITMASK(x) ((x >> 16) & 0xFFUL)
+#define GET_INSTANCE_CONFIG(x) (0ULL | (0x07ULL) \
+ | ((0x046ULL + x) << 8) \
+ | (0x02 << 16))
+
+/* df event conf macros */
+#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \
+ && GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2)
+#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \
+ && GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2)
+
extern const struct amdgpu_df_funcs df_v3_6_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a59e0fdf5a97..4cd1731d62fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4493,12 +4493,8 @@ static int gfx_v7_0_sw_init(void *handle)
static int gfx_v7_0_sw_fini(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
+ int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -5070,30 +5066,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
{
/* init asci gds info */
- adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
- adev->gds.gws.total_size = 64;
- adev->gds.oa.total_size = 16;
+ adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
-
- if (adev->gds.mem.total_size == 64 * 1024) {
- adev->gds.mem.gfx_partition_size = 4096;
- adev->gds.mem.cs_partition_size = 4096;
-
- adev->gds.gws.gfx_partition_size = 4;
- adev->gds.gws.cs_partition_size = 4;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 1;
- } else {
- adev->gds.mem.gfx_partition_size = 1024;
- adev->gds.mem.cs_partition_size = 1024;
-
- adev->gds.gws.gfx_partition_size = 16;
- adev->gds.gws.cs_partition_size = 16;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 4;
- }
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 02955e6e9dd9..25400b708722 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -2057,12 +2057,8 @@ static int gfx_v8_0_sw_init(void *handle)
static int gfx_v8_0_sw_fini(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
+ int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -7010,30 +7006,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
{
/* init asci gds info */
- adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
- adev->gds.gws.total_size = 64;
- adev->gds.oa.total_size = 16;
+ adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
-
- if (adev->gds.mem.total_size == 64 * 1024) {
- adev->gds.mem.gfx_partition_size = 4096;
- adev->gds.mem.cs_partition_size = 4096;
-
- adev->gds.gws.gfx_partition_size = 4;
- adev->gds.gws.cs_partition_size = 4;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 1;
- } else {
- adev->gds.mem.gfx_partition_size = 1024;
- adev->gds.mem.cs_partition_size = 1024;
-
- adev->gds.gws.gfx_partition_size = 16;
- adev->gds.gws.cs_partition_size = 16;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 4;
- }
}
static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ba67d1023264..c763733619fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -34,6 +34,7 @@
#include "vega10_enum.h"
#include "hdp/hdp_4_0_offset.h"
+#include "soc15.h"
#include "soc15_common.h"
#include "clearstate_gfx9.h"
#include "v9_structs.h"
@@ -307,12 +308,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_VEGA10:
- soc15_program_register_sequence(adev,
- golden_settings_gc_9_0,
- ARRAY_SIZE(golden_settings_gc_9_0));
- soc15_program_register_sequence(adev,
- golden_settings_gc_9_0_vg10,
- ARRAY_SIZE(golden_settings_gc_9_0_vg10));
+ if (!amdgpu_virt_support_skip_setting(adev)) {
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0,
+ ARRAY_SIZE(golden_settings_gc_9_0));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0_vg10,
+ ARRAY_SIZE(golden_settings_gc_9_0_vg10));
+ }
break;
case CHIP_VEGA12:
soc15_program_register_sequence(adev,
@@ -1458,8 +1461,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
/* GDS reserve memory: 64 bytes alignment */
adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
- adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
- adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
+ adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
@@ -1567,7 +1569,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
gfx_v9_0_write_data_to_reg(ring, 0, false,
SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
- (adev->gds.mem.total_size +
+ (adev->gds.gds_size +
adev->gfx.ngg.gds_reserve_size));
amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
@@ -1781,10 +1783,6 @@ static int gfx_v9_0_sw_fini(void *handle)
kfree(ras_if);
}
- amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
-
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
@@ -1834,7 +1832,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh
else
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
- WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
}
static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
@@ -1902,8 +1900,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
soc15_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
- WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
- WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
}
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -1914,7 +1912,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;
- WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
gfx_v9_0_tiling_mode_table_init(adev);
@@ -1957,7 +1955,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
*/
gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
- WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
+ WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
(adev->gfx.config.sc_prim_fifo_size_frontend <<
PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
(adev->gfx.config.sc_prim_fifo_size_backend <<
@@ -2024,11 +2022,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
{
/* csib */
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
adev->gfx.rlc.clear_state_gpu_addr >> 32);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
adev->gfx.rlc.clear_state_size);
}
@@ -2498,7 +2496,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].sched.ready = false;
}
- WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
udelay(50);
}
@@ -2696,9 +2694,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
int i;
if (enable) {
- WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
} else {
- WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
+ WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].sched.ready = false;
@@ -2759,9 +2757,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
}
static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
@@ -2979,67 +2977,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
/* disable wptr polling */
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
mqd->cp_hqd_eop_base_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
mqd->cp_hqd_eop_base_addr_hi);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
mqd->cp_hqd_eop_control);
/* enable doorbell? */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
mqd->cp_hqd_pq_doorbell_control);
/* disable the queue if it's active */
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
- WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
for (j = 0; j < adev->usec_timeout; j++) {
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
- WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
mqd->cp_hqd_dequeue_request);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
mqd->cp_hqd_pq_rptr);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
mqd->cp_hqd_pq_wptr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
mqd->cp_hqd_pq_wptr_hi);
}
/* set the pointer to the MQD */
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
mqd->cp_mqd_base_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
mqd->cp_mqd_base_addr_hi);
/* set MQD vmid to 0 */
- WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
+ WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
mqd->cp_mqd_control);
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
mqd->cp_hqd_pq_base_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
mqd->cp_hqd_pq_base_hi);
/* set up the HQD, this is similar to CP_RB0_CNTL */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
mqd->cp_hqd_pq_control);
/* set the wb address whether it's enabled or not */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
mqd->cp_hqd_pq_rptr_report_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
mqd->cp_hqd_pq_rptr_report_addr_hi);
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
mqd->cp_hqd_pq_wptr_poll_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
mqd->cp_hqd_pq_wptr_poll_addr_hi);
/* enable the doorbell if requested */
@@ -3054,19 +3052,19 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
mqd->cp_hqd_pq_doorbell_control);
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
mqd->cp_hqd_pq_wptr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
mqd->cp_hqd_pq_wptr_hi);
/* set the vmid for the queue */
- WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
- WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
mqd->cp_hqd_persistent_state);
/* activate the queue */
- WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
mqd->cp_hqd_active);
if (ring->use_doorbell)
@@ -3083,7 +3081,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
/* disable the queue if it's active */
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
- WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
for (j = 0; j < adev->usec_timeout; j++) {
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
@@ -3095,21 +3093,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
DRM_DEBUG("KIQ dequeue request failed.\n");
/* Manual disable if dequeue request times out */
- WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
}
- WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
0);
}
- WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
return 0;
}
@@ -3529,6 +3527,241 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
+static const u32 vgpr_init_compute_shader[] =
+{
+ 0xb07c0000, 0xbe8000ff,
+ 0x000000f8, 0xbf110800,
+ 0x7e000280, 0x7e020280,
+ 0x7e040280, 0x7e060280,
+ 0x7e080280, 0x7e0a0280,
+ 0x7e0c0280, 0x7e0e0280,
+ 0x80808800, 0xbe803200,
+ 0xbf84fff5, 0xbf9c0000,
+ 0xd28c0001, 0x0001007f,
+ 0xd28d0001, 0x0002027e,
+ 0x10020288, 0xb8810904,
+ 0xb7814000, 0xd1196a01,
+ 0x00000301, 0xbe800087,
+ 0xbefc00c1, 0xd89c4000,
+ 0x00020201, 0xd89cc080,
+ 0x00040401, 0x320202ff,
+ 0x00000800, 0x80808100,
+ 0xbf84fff8, 0x7e020280,
+ 0xbf810000, 0x00000000,
+};
+
+static const u32 sgpr_init_compute_shader[] =
+{
+ 0xb07c0000, 0xbe8000ff,
+ 0x0000005f, 0xbee50080,
+ 0xbe812c65, 0xbe822c65,
+ 0xbe832c65, 0xbe842c65,
+ 0xbe852c65, 0xb77c0005,
+ 0x80808500, 0xbf84fff8,
+ 0xbe800080, 0xbf810000,
+};
+
+static const struct soc15_reg_entry vgpr_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
+};
+
+static const struct soc15_reg_entry sgpr_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+};
+
+static const struct soc15_reg_entry sec_ded_counter_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
+ { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
+ { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
+ { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
+};
+
+static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ int r, i, j;
+ unsigned total_size, vgpr_offset, sgpr_offset;
+ u64 gpu_addr;
+
+ /* only support when RAS is enabled */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return 0;
+
+ /* bail if the compute ring is not ready */
+ if (!ring->sched.ready)
+ return 0;
+
+ total_size =
+ ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+ total_size +=
+ ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+ total_size = ALIGN(total_size, 256);
+ vgpr_offset = total_size;
+ total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
+ sgpr_offset = total_size;
+ total_size += sizeof(sgpr_init_compute_shader);
+
+ /* allocate an indirect buffer to put the commands in */
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, total_size, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+ return r;
+ }
+
+ /* load the compute shaders */
+ for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
+ ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
+
+ for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
+ ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
+
+ /* init the ib length to 0 */
+ ib.length_dw = 0;
+
+ /* VGPR */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = 128; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* SGPR */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = 128; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* shedule the ib on the ring */
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r) {
+ DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
+ goto fail;
+ }
+
+ /* wait for the GPU to finish processing the IB */
+ r = dma_fence_wait(f, false);
+ if (r) {
+ DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+ goto fail;
+ }
+
+ /* read back registers to clear the counters */
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (j = 0; j < 16; j++) {
+ gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
+ for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
+ RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+ gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
+ for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
+ RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+ gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
+ for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
+ RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+ gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
+ for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
+ RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+ }
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+fail:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+
+ return r;
+}
+
static int gfx_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -3570,8 +3803,31 @@ static int gfx_v9_0_ecc_late_init(void *handle)
return 0;
}
- if (*ras_if)
+ /* requires IBs so do in late init after IB pool is initialized */
+ r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+ if (r)
+ return r;
+
+ /* handle resume path. */
+ if (*ras_if) {
+ /* resend ras TA enable cmd during resume.
+ * prepare to handle failure.
+ */
+ ih_info.head = **ras_if;
+ r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
+ if (r) {
+ if (r == -EAGAIN) {
+ /* request a gpu reset. will run again. */
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__GFX);
+ return 0;
+ }
+ /* fail to enable ras, cleanup all. */
+ goto irq;
+ }
+ /* enable successfully. continue. */
goto resume;
+ }
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
if (!*ras_if)
@@ -3580,8 +3836,14 @@ static int gfx_v9_0_ecc_late_init(void *handle)
**ras_if = ras_block;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r)
+ if (r) {
+ if (r == -EAGAIN) {
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__GFX);
+ r = 0;
+ }
goto feature;
+ }
ih_info.head = **ras_if;
fs_info.head = **ras_if;
@@ -3614,7 +3876,7 @@ interrupt:
feature:
kfree(*ras_if);
*ras_if = NULL;
- return -EINVAL;
+ return r;
}
static int gfx_v9_0_late_init(void *handle)
@@ -4319,8 +4581,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
- WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -5056,13 +5318,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
- adev->gds.mem.total_size = 0x10000;
+ adev->gds.gds_size = 0x10000;
break;
case CHIP_RAVEN:
- adev->gds.mem.total_size = 0x1000;
+ adev->gds.gds_size = 0x1000;
break;
default:
- adev->gds.mem.total_size = 0x10000;
+ adev->gds.gds_size = 0x10000;
break;
}
@@ -5086,28 +5348,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
break;
}
- adev->gds.gws.total_size = 64;
- adev->gds.oa.total_size = 16;
-
- if (adev->gds.mem.total_size == 64 * 1024) {
- adev->gds.mem.gfx_partition_size = 4096;
- adev->gds.mem.cs_partition_size = 4096;
-
- adev->gds.gws.gfx_partition_size = 4;
- adev->gds.gws.cs_partition_size = 4;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 1;
- } else {
- adev->gds.mem.gfx_partition_size = 1024;
- adev->gds.mem.cs_partition_size = 1024;
-
- adev->gds.gws.gfx_partition_size = 16;
- adev->gds.gws.cs_partition_size = 16;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 4;
- }
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
}
static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 7bb5359d0bbd..0dc8926111e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
uint64_t value;
/* Program the AGP BAR */
- WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0);
- WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
- WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
/* Program the system aperture low logical page number. */
- WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
@@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
* workaround that increase system aperture high address (add 1)
* to get rid of the VM fault and hardware hang.
*/
- WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max((adev->gmc.fb_end >> 18) + 0x1,
adev->gmc.agp_end >> 18));
else
- WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
@@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
MTYPE, MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
- WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
@@ -267,9 +267,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
* VF copy registers so vbios post doesn't program them, for
* SRIOV driver need to program them
*/
- WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE,
adev->gmc.vram_start >> 24);
- WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP,
adev->gmc.vram_end >> 24);
}
@@ -303,7 +303,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
MC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL,
0);
- WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 8a3b5e6fc6c9..8bf2ba310fd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -289,7 +289,7 @@ out:
*
* @adev: amdgpu_device pointer
*
- * Load the GDDR MC ucode into the hw (CIK).
+ * Load the GDDR MC ucode into the hw (VI).
* Returns 0 on success, error on failure.
*/
static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
@@ -443,7 +443,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
* @adev: amdgpu_device pointer
*
* Set the location of vram, gart, and AGP in the GPU's
- * physical address space (CIK).
+ * physical address space (VI).
*/
static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
{
@@ -515,7 +515,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
* @adev: amdgpu_device pointer
*
* Look up the amount of vram, vram width, and decide how to place
- * vram and gart within the GPU's physical address space (CIK).
+ * vram and gart within the GPU's physical address space (VI).
* Returns 0 for success.
*/
static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
@@ -630,7 +630,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
* @adev: amdgpu_device pointer
* @vmid: vm instance to flush
*
- * Flush the TLB for the requested page table (CIK).
+ * Flush the TLB for the requested page table (VI).
*/
static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid, uint32_t flush_type)
@@ -800,7 +800,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
* This sets up the TLBs, programs the page tables for VMID0,
* sets up the hw for VMIDs 1-15 which are allocated on
* demand, and sets up the global locations for the LDS, GDS,
- * and GPUVM for FSA64 clients (CIK).
+ * and GPUVM for FSA64 clients (VI).
* Returns 0 for success, errors for failure.
*/
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
@@ -948,7 +948,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev)
*
* @adev: amdgpu_device pointer
*
- * This disables all VM page table (CIK).
+ * This disables all VM page table (VI).
*/
static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
{
@@ -978,7 +978,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
* @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
* @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
*
- * Print human readable fault information (CIK).
+ * Print human readable fault information (VI).
*/
static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
u32 addr, u32 mc_client, unsigned pasid)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3b7370d914a5..602593bab7a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -687,8 +687,25 @@ static int gmc_v9_0_ecc_late_init(void *handle)
return 0;
}
/* handle resume path. */
- if (*ras_if)
+ if (*ras_if) {
+ /* resend ras TA enable cmd during resume.
+ * prepare to handle failure.
+ */
+ ih_info.head = **ras_if;
+ r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
+ if (r) {
+ if (r == -EAGAIN) {
+ /* request a gpu reset. will run again. */
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__UMC);
+ return 0;
+ }
+ /* fail to enable ras, cleanup all. */
+ goto irq;
+ }
+ /* enable successfully. continue. */
goto resume;
+ }
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
if (!*ras_if)
@@ -697,8 +714,14 @@ static int gmc_v9_0_ecc_late_init(void *handle)
**ras_if = ras_block;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r)
+ if (r) {
+ if (r == -EAGAIN) {
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__UMC);
+ r = 0;
+ }
goto feature;
+ }
ih_info.head = **ras_if;
fs_info.head = **ras_if;
@@ -731,7 +754,7 @@ interrupt:
feature:
kfree(*ras_if);
*ras_if = NULL;
- return -EINVAL;
+ return r;
}
@@ -1100,6 +1123,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
+ if (amdgpu_virt_support_skip_setting(adev))
+ break;
+ /* fall through */
case CHIP_VEGA20:
soc15_program_register_sequence(adev,
golden_settings_mmhub_1_0_0,
@@ -1164,6 +1190,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
+ WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
+ WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
+
/* After HDP is initialized, flush HDP.*/
adev->nbio_funcs->hdp_flush(adev, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 41a9a5779623..05d1d448c8f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+ if (amdgpu_virt_support_skip_setting(adev))
+ return;
+
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
adev->vm_manager.vram_base_offset;
@@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
+ if (amdgpu_virt_support_skip_setting(adev))
+ return;
+
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
@@ -202,6 +208,9 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
{
+ if (amdgpu_virt_support_skip_setting(adev))
+ return;
+
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0XFFFFFFFF);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
@@ -338,11 +347,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
0);
WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
- /* Setup L2 cache */
- tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
- WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
- WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
+ if (!amdgpu_virt_support_skip_setting(adev)) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
+ }
}
/**
@@ -354,6 +365,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
{
u32 tmp;
+
+ if (amdgpu_virt_support_skip_setting(adev))
+ return;
+
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 2471e7cf75ea..31030f86be86 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -26,6 +26,7 @@
#include "nbio/nbio_6_1_sh_mask.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
+#include "mp/mp_9_0_offset.h"
#include "soc15.h"
#include "vega10_ih.h"
#include "soc15_common.h"
@@ -343,7 +344,7 @@ flr_done:
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
- && amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT)
+ && adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)
amdgpu_device_gpu_recover(adev, NULL);
}
@@ -448,6 +449,23 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
}
+static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev)
+{
+ uint32_t rlc_fw_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
+ uint32_t sos_fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
+
+ adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY;
+
+ if (rlc_fw_ver >= 0x5d)
+ adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC;
+
+ if (sos_fw_ver >= 0x80455)
+ adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH;
+
+ if (sos_fw_ver >= 0x8045b)
+ adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING;
+}
+
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.req_full_gpu = xgpu_ai_request_full_gpu_access,
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
@@ -456,4 +474,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.trans_msg = xgpu_ai_mailbox_trans_msg,
.get_pp_clk = xgpu_ai_get_pp_clk,
.force_dpm_level = xgpu_ai_force_dpm_level,
+ .init_reg_access_mode = xgpu_ai_init_reg_access_mode,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 1cdb98ad2db3..73419fa38159 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -29,9 +29,18 @@
#include "nbio/nbio_7_0_sh_mask.h"
#include "nbio/nbio_7_0_smn.h"
#include "vega10_enum.h"
+#include <uapi/linux/kfd_ioctl.h>
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
+static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
- WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
else
- amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
- NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
}
static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
@@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
.ih_control = nbio_v7_0_ih_control,
.init_registers = nbio_v7_0_init_registers,
.detect_hw_virt = nbio_v7_0_detect_hw_virt,
+ .remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index c69d51598cfe..bfaaa327ae3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -27,9 +27,18 @@
#include "nbio/nbio_7_4_offset.h"
#include "nbio/nbio_7_4_sh_mask.h"
#include "nbio/nbio_7_4_0_smn.h"
+#include <uapi/linux/kfd_ioctl.h>
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
+static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
- WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
else
- amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
- NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
}
static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
@@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.ih_control = nbio_v7_4_ih_control,
.init_registers = nbio_v7_4_init_registers,
.detect_hw_virt = nbio_v7_4_detect_hw_virt,
+ .remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 2f79765b4bdb..7f8edc66ddff 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -94,6 +94,7 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
+ GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
};
@@ -217,6 +218,12 @@ struct psp_gfx_cmd_save_restore_ip_fw
enum psp_gfx_fw_type fw_type; /* FW type */
};
+/* Command to setup register program */
+struct psp_gfx_cmd_reg_prog {
+ uint32_t reg_value;
+ uint32_t reg_id;
+};
+
/* All GFX ring buffer commands. */
union psp_gfx_commands
{
@@ -226,6 +233,7 @@ union psp_gfx_commands
struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
+ struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 143f0fae69d5..3f5827764df0 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -50,6 +50,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
+static bool psp_v3_1_support_vmr_ring(struct psp_context *psp);
+static int psp_v3_1_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type);
+
static int psp_v3_1_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -296,27 +300,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
psp_v3_1_reroute_ih(psp);
- /* Write low address of the ring to C2PMSG_69 */
- psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
- /* Write high address of the ring to C2PMSG_70 */
- psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
- /* Write size of ring to C2PMSG_71 */
- psp_ring_reg = ring->ring_size;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
- /* Write the ring initialization command to C2PMSG_64 */
- psp_ring_reg = ring_type;
- psp_ring_reg = psp_ring_reg << 16;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ if (psp_v3_1_support_vmr_ring(psp)) {
+ ret = psp_v3_1_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
+ /* No size initialization for sriov */
+ /* Write the ring initialization command to C2PMSG_101 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
+
+ /* there might be hardware handshake issue which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
+ mmMP0_SMN_C2PMSG_101), 0x80000000,
+ 0x8000FFFF, false);
+ } else {
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be hardware handshake issue which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
+ mmMP0_SMN_C2PMSG_64), 0x80000000,
+ 0x8000FFFF, false);
+ }
return ret;
}
@@ -327,16 +361,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
unsigned int psp_ring_reg = 0;
struct amdgpu_device *adev = psp->adev;
- /* Write the ring destroy command to C2PMSG_64 */
- psp_ring_reg = 3 << 16;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ if (psp_v3_1_support_vmr_ring(psp)) {
+ /* Write the Destroy GPCOM ring command to C2PMSG_101 */
+ psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
+
+ /* there might be handshake issue which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ } else {
+ /* Write the ring destroy command to C2PMSG_64 */
+ psp_ring_reg = 3 << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ }
return ret;
}
@@ -375,7 +424,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
/* KM (GPCOM) prepare write pointer */
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+ if (psp_v3_1_support_vmr_ring(psp))
+ psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ else
+ psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
/* Update KM RB frame pointer to new frame */
/* write_frame ptr increments by size of rb_frame in bytes */
@@ -404,7 +456,13 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
/* Update the write Pointer in DWORDs */
psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
+ if (psp_v3_1_support_vmr_ring(psp)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
+ /* send interrupt to PSP for SRIOV ring write pointer update */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
return 0;
}
@@ -574,6 +632,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
return 0;
}
+static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
+{
+ if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455)
+ return true;
+
+ return false;
+}
+
static const struct psp_funcs psp_v3_1_funcs = {
.init_microcode = psp_v3_1_init_microcode,
.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
@@ -586,6 +652,7 @@ static const struct psp_funcs psp_v3_1_funcs = {
.compare_sram_data = psp_v3_1_compare_sram_data,
.smu_reload_quirk = psp_v3_1_smu_reload_quirk,
.mode1_reset = psp_v3_1_mode1_reset,
+ .support_vmr_ring = psp_v3_1_support_vmr_ring,
};
void psp_v3_1_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 9c88ce513d78..7a259c5b6c62 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -210,12 +210,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_VEGA10:
- soc15_program_register_sequence(adev,
- golden_settings_sdma_4,
- ARRAY_SIZE(golden_settings_sdma_4));
- soc15_program_register_sequence(adev,
- golden_settings_sdma_vg10,
- ARRAY_SIZE(golden_settings_sdma_vg10));
+ if (!amdgpu_virt_support_skip_setting(adev)) {
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_4,
+ ARRAY_SIZE(golden_settings_sdma_4));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_vg10,
+ ARRAY_SIZE(golden_settings_sdma_vg10));
+ }
break;
case CHIP_VEGA12:
soc15_program_register_sequence(adev,
@@ -1521,8 +1523,25 @@ static int sdma_v4_0_late_init(void *handle)
}
/* handle resume path. */
- if (*ras_if)
+ if (*ras_if) {
+ /* resend ras TA enable cmd during resume.
+ * prepare to handle failure.
+ */
+ ih_info.head = **ras_if;
+ r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
+ if (r) {
+ if (r == -EAGAIN) {
+ /* request a gpu reset. will run again. */
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__SDMA);
+ return 0;
+ }
+ /* fail to enable ras, cleanup all. */
+ goto irq;
+ }
+ /* enable successfully. continue. */
goto resume;
+ }
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
if (!*ras_if)
@@ -1531,8 +1550,14 @@ static int sdma_v4_0_late_init(void *handle)
**ras_if = ras_block;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r)
+ if (r) {
+ if (r == -EAGAIN) {
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__SDMA);
+ r = 0;
+ }
goto feature;
+ }
ih_info.head = **ras_if;
fs_info.head = **ras_if;
@@ -1571,7 +1596,7 @@ interrupt:
feature:
kfree(*ras_if);
*ras_if = NULL;
- return -EINVAL;
+ return r;
}
static int sdma_v4_0_sw_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 9d8df68893b9..4ff930a47e10 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1375,6 +1375,18 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
}
+static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
static const struct amdgpu_asic_funcs si_asic_funcs =
{
.read_disabled_bios = &si_read_disabled_bios,
@@ -1393,6 +1405,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.need_full_reset = &si_need_full_reset,
.get_pcie_usage = &si_get_pcie_usage,
.need_reset_on_init = &si_need_reset_on_init,
+ .get_pcie_replay_count = &si_get_pcie_replay_count,
};
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index b7e594c2bfb4..d9fdd95fd6e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -44,6 +44,7 @@
#include "smuio/smuio_9_0_offset.h"
#include "smuio/smuio_9_0_sh_mask.h"
#include "nbio/nbio_7_0_default.h"
+#include "nbio/nbio_7_0_offset.h"
#include "nbio/nbio_7_0_sh_mask.h"
#include "nbio/nbio_7_0_smn.h"
#include "mp/mp_9_0_offset.h"
@@ -64,6 +65,9 @@
#include "dce_virtual.h"
#include "mxgpu_ai.h"
#include "amdgpu_smu.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
+#include <uapi/linux/kfd_ioctl.h>
#define mmMP0_MISC_CGTT_CTRL0 0x01b9
#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
@@ -230,7 +234,7 @@ void soc15_grbm_select(struct amdgpu_device *adev,
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl);
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
}
static void soc15_vga_set_state(struct amdgpu_device *adev, bool state)
@@ -385,7 +389,15 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
tmp &= ~(entry->and_mask);
tmp |= entry->or_mask;
}
- WREG32(reg, tmp);
+
+ if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) ||
+ reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) ||
+ reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) ||
+ reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG))
+ WREG32_RLC(reg, tmp);
+ else
+ WREG32(reg, tmp);
+
}
}
@@ -475,6 +487,13 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
soc15_asic_get_baco_capability(adev, &baco_reset);
else
baco_reset = false;
+ if (baco_reset) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (hive || (ras && ras->supported))
+ baco_reset = false;
+ }
break;
default:
baco_reset = false;
@@ -606,12 +625,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_VEGA20:
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
- amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
- if (adev->asic_type == CHIP_VEGA20)
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- else
- amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+
+ /* For Vega10 SR-IOV, PSP need to be initialized before IH */
+ if (amdgpu_sriov_vf(adev)) {
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ if (adev->asic_type == CHIP_VEGA20)
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+ }
+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+ } else {
+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ if (adev->asic_type == CHIP_VEGA20)
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+ }
}
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
@@ -733,7 +764,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
/* Just return false for soc15 GPUs. Reset does not seem to
* be necessary.
*/
- return false;
+ if (!amdgpu_passthrough(adev))
+ return false;
if (adev->flags & AMD_IS_APU)
return false;
@@ -748,6 +780,18 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
+static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
static const struct amdgpu_asic_funcs soc15_asic_funcs =
{
.read_disabled_bios = &soc15_read_disabled_bios,
@@ -765,6 +809,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.init_doorbell_index = &vega10_doorbell_index_init,
.get_pcie_usage = &soc15_get_pcie_usage,
.need_reset_on_init = &soc15_need_reset_on_init,
+ .get_pcie_replay_count = &soc15_get_pcie_replay_count,
};
static const struct amdgpu_asic_funcs vega20_asic_funcs =
@@ -784,12 +829,16 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.init_doorbell_index = &vega20_doorbell_index_init,
.get_pcie_usage = &soc15_get_pcie_usage,
.need_reset_on_init = &soc15_need_reset_on_init,
+ .get_pcie_replay_count = &soc15_get_pcie_replay_count,
};
static int soc15_common_early_init(void *handle)
{
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &soc15_pcie_rreg;
@@ -998,11 +1047,17 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
int i;
struct amdgpu_ring *ring;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
- adev->nbio_funcs->sdma_doorbell_range(adev, i,
- ring->use_doorbell, ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range);
+ /* Two reasons to skip
+ * 1, Host driver already programmed them
+ * 2, To avoid registers program violations in SR-IOV
+ */
+ if (!amdgpu_virt_support_skip_setting(adev)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ adev->nbio_funcs->sdma_doorbell_range(adev, i,
+ ring->use_doorbell, ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range);
+ }
}
adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
@@ -1019,6 +1074,12 @@ static int soc15_common_hw_init(void *handle)
soc15_program_aspm(adev);
/* setup nbio registers */
adev->nbio_funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio_funcs->remap_hdp_registers)
+ adev->nbio_funcs->remap_hdp_registers(adev);
/* enable the doorbell aperture */
soc15_enable_doorbell_aperture(adev, true);
/* HW doorbell routing policy: doorbell writing not
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index a66c8bfbbaa6..06f39f5bbf76 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -42,8 +42,18 @@ struct soc15_reg_golden {
u32 or_mask;
};
+struct soc15_reg_entry {
+ uint32_t hwip;
+ uint32_t inst;
+ uint32_t seg;
+ uint32_t reg_offset;
+ uint32_t reg_value;
+};
+
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
+#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
+
#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 49c262540940..47f74dab365d 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -69,26 +69,60 @@
} \
} while (0)
-#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
- ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
- UVD_DPG_LMA_CTL__MASK_EN_MASK | \
- ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
- << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
- (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
- RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); })
+#define WREG32_RLC(reg, value) \
+ do { \
+ if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
+ uint32_t i = 0; \
+ uint32_t retries = 50000; \
+ uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \
+ uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \
+ uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \
+ WREG32(r0, value); \
+ WREG32(r1, (reg | 0x80000000)); \
+ WREG32(spare_int, 0x1); \
+ for (i = 0; i < retries; i++) { \
+ u32 tmp = RREG32(r1); \
+ if (!(tmp & 0x80000000)) \
+ break; \
+ udelay(10); \
+ } \
+ if (i >= retries) \
+ pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \
+ } else { \
+ WREG32(reg, value); \
+ } \
+ } while (0)
-#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
+#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
do { \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
- UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
- ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
- << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
- (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
+ if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
+ uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \
+ uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \
+ uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
+ uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \
+ if (target_reg == grbm_cntl) \
+ WREG32(r2, value); \
+ else if (target_reg == grbm_idx) \
+ WREG32(r3, value); \
+ WREG32(target_reg, value); \
+ } else { \
+ WREG32(target_reg, value); \
+ } \
} while (0)
-#endif
+#define WREG32_SOC15_RLC(ip, inst, reg, value) \
+ do { \
+ uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
+ WREG32_RLC(target_reg, value); \
+ } while (0)
+
+#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \
+ WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
+ (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
+ & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
+ WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value)
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index c4fb58667fd4..bf3385280d3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = uvd_v4_2_ring_get_rptr,
.get_wptr = uvd_v4_2_ring_get_wptr,
.set_wptr = uvd_v4_2_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 52bd8a654734..3210a7bd9a6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -849,6 +849,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = uvd_v5_0_ring_get_rptr,
.get_wptr = uvd_v5_0_ring_get_wptr,
.set_wptr = uvd_v5_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index c9edddf9f88a..c61a314c56cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -1502,6 +1502,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = uvd_v6_0_ring_get_rptr,
.get_wptr = uvd_v6_0_ring_get_wptr,
.set_wptr = uvd_v6_0_ring_set_wptr,
@@ -1527,6 +1528,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = uvd_v6_0_ring_get_rptr,
.get_wptr = uvd_v6_0_ring_get_wptr,
.set_wptr = uvd_v6_0_ring_set_wptr,
@@ -1555,6 +1557,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = HEVC_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = uvd_v6_0_enc_ring_get_rptr,
.get_wptr = uvd_v6_0_enc_ring_get_wptr,
.set_wptr = uvd_v6_0_enc_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 2191d3d0a219..cdb96d4cb424 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1759,6 +1759,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = uvd_v7_0_ring_get_rptr,
.get_wptr = uvd_v7_0_ring_get_wptr,
@@ -1791,6 +1792,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = HEVC_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = uvd_v7_0_enc_ring_get_rptr,
.get_wptr = uvd_v7_0_enc_ring_get_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 40363ca6c5f1..ab0cb8325796 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
.align_mask = 0xf,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = vce_v2_0_ring_get_rptr,
.get_wptr = vce_v2_0_ring_get_wptr,
.set_wptr = vce_v2_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6ec65cf11112..36902ec16dcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
.align_mask = 0xf,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = vce_v3_0_ring_get_rptr,
.get_wptr = vce_v3_0_ring_get_wptr,
.set_wptr = vce_v3_0_ring_set_wptr,
@@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
.align_mask = 0xf,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = vce_v3_0_ring_get_rptr,
.get_wptr = vce_v3_0_ring_get_wptr,
.set_wptr = vce_v3_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index c0ec27991c22..e267b073f525 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -1069,6 +1069,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = vce_v4_0_ring_get_rptr,
.get_wptr = vce_v4_0_ring_get_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 3dbc51f9d3b9..bb47f5b24be5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
+ struct dpg_pause_state *new_state);
/**
* vcn_v1_0_early_init - set function pointers
@@ -140,7 +142,9 @@ static int vcn_v1_0_sw_init(void *handle)
if (r)
return r;
- return r;
+ adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
+
+ return 0;
}
/**
@@ -1204,6 +1208,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
return r;
}
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
+ struct dpg_pause_state *new_state)
+{
+ int ret_code;
+ uint32_t reg_data = 0;
+ uint32_t reg_data2 = 0;
+ struct amdgpu_ring *ring;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+ adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+ new_state->fw_based, new_state->jpeg);
+
+ reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+
+ if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ if (!ret_code) {
+ /* pause DPG non-jpeg */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+
+ /* Restore */
+ ring = &adev->vcn.ring_enc[0];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.ring_enc[1];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.ring_dec;
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ }
+ } else {
+ /* unpause dpg non-jpeg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.pause_state.fw_based = new_state->fw_based;
+ }
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
+ DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+ adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+ new_state->fw_based, new_state->jpeg);
+
+ reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+
+ if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ if (!ret_code) {
+ /* Make sure JPRG Snoop is disabled before sending the pause */
+ reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
+ reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
+
+ /* pause DPG jpeg */
+ reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
+
+ /* Restore */
+ ring = &adev->vcn.ring_jpeg;
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+ UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+
+ ring = &adev->vcn.ring_dec;
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ }
+ } else {
+ /* unpause dpg jpeg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.pause_state.jpeg = new_state->jpeg;
+ }
+
+ return 0;
+}
+
static bool vcn_v1_0_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2054,6 +2184,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
.get_wptr = vcn_v1_0_dec_ring_get_wptr,
@@ -2087,6 +2218,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = vcn_v1_0_enc_ring_get_rptr,
.get_wptr = vcn_v1_0_enc_ring_get_wptr,
@@ -2118,6 +2250,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
.align_mask = 0xf,
.nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.extra_dw = 64,
.get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 8d89ab7f0ae8..5f54acc70fec 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -48,14 +48,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ }
adev->irq.ih.enabled = true;
if (adev->irq.ih1.ring_size) {
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
RB_ENABLE, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
adev->irq.ih1.enabled = true;
}
@@ -63,7 +78,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
RB_ENABLE, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ }
adev->irq.ih2.enabled = true;
}
}
@@ -81,7 +104,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ }
+
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
@@ -92,7 +123,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
RB_ENABLE, 0);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
@@ -104,7 +143,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
RB_ENABLE, 0);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ }
+
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
@@ -187,7 +235,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
!!adev->irq.msi_enabled);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ }
/* set the writeback address whether it's enabled or not */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
@@ -214,7 +270,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
WPTR_OVERFLOW_ENABLE, 0);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
RB_FULL_DRAIN_ENABLE, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
@@ -232,7 +296,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ }
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 5e5b42a0744a..b8adf3808de2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -987,6 +987,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
}
+static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
static bool vi_need_reset_on_init(struct amdgpu_device *adev)
{
u32 clock_cntl, pc;
@@ -1021,6 +1033,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.init_doorbell_index = &legacy_doorbell_index_init,
.get_pcie_usage = &vi_get_pcie_usage,
.need_reset_on_init = &vi_need_reset_on_init,
+ .get_pcie_replay_count = &vi_get_pcie_replay_count,
};
#define CZ_REV_BRISTOL(rev) \
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 3621efbd5759..e413d4a71fa3 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -21,7 +21,7 @@
*/
static const uint32_t cwsr_trap_gfx8_hex[] = {
- 0xbf820001, 0xbf82012b,
+ 0xbf820001, 0xbf820121,
0xb8f4f802, 0x89748674,
0xb8f5f803, 0x8675ff75,
0x00000400, 0xbf850017,
@@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
0x8671ff71, 0x0000ffff,
0x8f728374, 0xb972e0c2,
0xbf800002, 0xb9740002,
- 0xbe801f70, 0xb8f5f803,
- 0x8675ff75, 0x00000100,
- 0xbf840006, 0xbefa0080,
- 0xb97a0203, 0x8671ff71,
- 0x0000ffff, 0x80f08870,
- 0x82f18071, 0xbefa0080,
+ 0xbe801f70, 0xbefa0080,
0xb97a0283, 0xbef60068,
0xbef70069, 0xb8fa1c07,
0x8e7a9c7a, 0x87717a71,
@@ -279,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
static const uint32_t cwsr_trap_gfx9_hex[] = {
- 0xbf820001, 0xbf82015d,
+ 0xbf820001, 0xbf82015e,
0xb8f8f802, 0x89788678,
- 0xb8f1f803, 0x866eff71,
- 0x00000400, 0xbf850037,
- 0x866eff71, 0x00000800,
- 0xbf850003, 0x866eff71,
- 0x00000100, 0xbf840008,
+ 0xb8fbf803, 0x866eff7b,
+ 0x00000400, 0xbf85003b,
+ 0x866eff7b, 0x00000800,
+ 0xbf850003, 0x866eff7b,
+ 0x00000100, 0xbf84000c,
0x866eff78, 0x00002000,
- 0xbf840001, 0xbf810000,
+ 0xbf840005, 0xbf8e0010,
+ 0xb8eef803, 0x866eff6e,
+ 0x00000400, 0xbf84fffb,
0x8778ff78, 0x00002000,
0x80ec886c, 0x82ed806d,
0xb8eef807, 0x866fff6e,
@@ -295,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0x8977ff77, 0xfc000000,
0x87776f77, 0x896eff6e,
0x001f8000, 0xb96ef807,
- 0xb8f0f812, 0xb8f1f813,
- 0x8ef08870, 0xc0071bb8,
+ 0xb8faf812, 0xb8fbf813,
+ 0x8efa887a, 0xc0071bbd,
0x00000000, 0xbf8cc07f,
- 0xc0071c38, 0x00000008,
+ 0xc0071ebd, 0x00000008,
0xbf8cc07f, 0x86ee6e6e,
0xbf840001, 0xbe801d6e,
- 0xb8f1f803, 0x8671ff71,
+ 0xb8fbf803, 0x867bff7b,
0x000001ff, 0xbf850002,
0x806c846c, 0x826d806d,
0x866dff6d, 0x0000ffff,
@@ -311,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0x8f6e8378, 0xb96ee0c2,
0xbf800002, 0xb9780002,
0xbe801f6c, 0x866dff6d,
- 0x0000ffff, 0xbef00080,
- 0xb9700283, 0xb8f02407,
- 0x8e709c70, 0x876d706d,
- 0xb8f003c7, 0x8e709b70,
- 0x876d706d, 0xb8f0f807,
- 0x8670ff70, 0x00007fff,
- 0xb970f807, 0xbeee007e,
+ 0x0000ffff, 0xbefa0080,
+ 0xb97a0283, 0xb8fa2407,
+ 0x8e7a9b7a, 0x876d7a6d,
+ 0xb8fa03c7, 0x8e7a9a7a,
+ 0x876d7a6d, 0xb8faf807,
+ 0x867aff7a, 0x00007fff,
+ 0xb97af807, 0xbeee007e,
0xbeef007f, 0xbefe0180,
- 0xbf900004, 0x87708478,
- 0xb970f802, 0xbf8e0002,
- 0xbf88fffe, 0xb8f02a05,
+ 0xbf900004, 0x877a8478,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xb8fa2a05,
+ 0x807a817a, 0x8e7a8a7a,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b867b, 0x807a7b7a,
+ 0x807a7e7a, 0x827b807f,
+ 0x867bff7b, 0x0000ffff,
+ 0xc04b1c3d, 0x00000050,
+ 0xbf8cc07f, 0xc04b1d3d,
+ 0x00000060, 0xbf8cc07f,
+ 0xc0431e7d, 0x00000074,
+ 0xbf8cc07f, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x867aff7f,
+ 0x08000000, 0x8f7a837a,
+ 0x87777a77, 0x867aff7f,
+ 0x70000000, 0x8f7a817a,
+ 0x87777a77, 0xbef1007c,
+ 0xbef00080, 0xb8f02a05,
0x80708170, 0x8e708a70,
- 0xb8f11605, 0x80718171,
- 0x8e718671, 0x80707170,
- 0x80707e70, 0x8271807f,
- 0x8671ff71, 0x0000ffff,
- 0xc0471cb8, 0x00000040,
- 0xbf8cc07f, 0xc04b1d38,
- 0x00000048, 0xbf8cc07f,
- 0xc0431e78, 0x00000058,
- 0xbf8cc07f, 0xc0471eb8,
- 0x0000005c, 0xbf8cc07f,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
- 0xbef700ff, 0x00807fac,
- 0x8670ff7f, 0x08000000,
- 0x8f708370, 0x87777077,
- 0x8670ff7f, 0x70000000,
- 0x8f708170, 0x87777077,
- 0xbefb007c, 0xbefa0080,
- 0xb8fa2a05, 0x807a817a,
- 0x8e7a8a7a, 0xb8f01605,
- 0x80708170, 0x8e708670,
- 0x807a707a, 0xbef60084,
- 0xbef600ff, 0x01000000,
- 0xbefe007c, 0xbefc007a,
- 0xc0611efa, 0x0000007c,
- 0xbf8cc07f, 0x807a847a,
- 0xbefc007e, 0xbefe007c,
- 0xbefc007a, 0xc0611b3a,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x80707a70,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc0070, 0xc0611c7a,
0x0000007c, 0xbf8cc07f,
- 0x807a847a, 0xbefc007e,
- 0xbefe007c, 0xbefc007a,
- 0xc0611b7a, 0x0000007c,
- 0xbf8cc07f, 0x807a847a,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611b3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc007a, 0xc0611bba,
+ 0xbefc0070, 0xc0611b7a,
0x0000007c, 0xbf8cc07f,
- 0x807a847a, 0xbefc007e,
- 0xbefe007c, 0xbefc007a,
- 0xc0611bfa, 0x0000007c,
- 0xbf8cc07f, 0x807a847a,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611bba, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc007a, 0xc0611e3a,
+ 0xbefc0070, 0xc0611bfa,
0x0000007c, 0xbf8cc07f,
- 0x807a847a, 0xbefc007e,
- 0xb8f1f803, 0xbefe007c,
- 0xbefc007a, 0xc0611c7a,
- 0x0000007c, 0xbf8cc07f,
- 0x807a847a, 0xbefc007e,
- 0xbefe007c, 0xbefc007a,
- 0xc0611a3a, 0x0000007c,
- 0xbf8cc07f, 0x807a847a,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8fbf803,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611efa, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
0xbefc007e, 0xbefe007c,
- 0xbefc007a, 0xc0611a7a,
- 0x0000007c, 0xbf8cc07f,
- 0x807a847a, 0xbefc007e,
- 0xb8fbf801, 0xbefe007c,
- 0xbefc007a, 0xc0611efa,
+ 0xbefc0070, 0xc0611a3a,
0x0000007c, 0xbf8cc07f,
- 0x807a847a, 0xbefc007e,
- 0x8670ff7f, 0x04000000,
- 0xbeef0080, 0x876f6f70,
- 0xb8fa2a05, 0x807a817a,
- 0x8e7a8a7a, 0xb8f11605,
- 0x80718171, 0x8e718471,
- 0x8e768271, 0xbef600ff,
- 0x01000000, 0xbef20174,
- 0x80747a74, 0x82758075,
- 0xbefc0080, 0xbf800000,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003a, 0x00000000,
- 0xbf8cc07f, 0xc06b013a,
- 0x00000010, 0xbf8cc07f,
- 0xc06b023a, 0x00000020,
- 0xbf8cc07f, 0xc06b033a,
- 0x00000030, 0xbf8cc07f,
- 0x8074c074, 0x82758075,
- 0x807c907c, 0xbf0a717c,
- 0xbf85ffe7, 0xbef40172,
- 0xbefa0080, 0xbefe00c1,
- 0xbeff00c1, 0xbee80080,
- 0xbee90080, 0xbef600ff,
- 0x01000000, 0xe0724000,
- 0x7a1d0000, 0xe0724100,
- 0x7a1d0100, 0xe0724200,
- 0x7a1d0200, 0xe0724300,
- 0x7a1d0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8f14306,
- 0x8671c171, 0xbf84002c,
- 0xbf8a0000, 0x8670ff6f,
- 0x04000000, 0xbf840028,
- 0x8e718671, 0x8e718271,
- 0xbef60071, 0xb8fa2a05,
- 0x807a817a, 0x8e7a8a7a,
- 0xb8f01605, 0x80708170,
- 0x8e708670, 0x807a707a,
- 0x807aff7a, 0x00000080,
+ 0x80708470, 0xbefc007e,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0xb8f1f801,
+ 0xbefe007c, 0xbefc0070,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x80708470,
+ 0xbefc007e, 0x867aff7f,
+ 0x04000000, 0xbeef0080,
+ 0x876f6f7a, 0xb8f02a05,
+ 0x80708170, 0x8e708a70,
+ 0xb8fb1605, 0x807b817b,
+ 0x8e7b847b, 0x8e76827b,
0xbef600ff, 0x01000000,
- 0xbefc0080, 0xd28c0002,
- 0x000100c1, 0xd28d0003,
- 0x000204c1, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe800077,
- 0x8677ff77, 0xff7fffff,
- 0x8777ff77, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8cc07f, 0xe0765000,
- 0x7a1d0002, 0x68040702,
- 0xd0c9006a, 0x0000e302,
- 0xbf87fff7, 0xbef70000,
- 0xbefa00ff, 0x00000400,
+ 0xbef20174, 0x80747074,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a7b7c, 0xbf85ffe7,
+ 0xbef40172, 0xbef00080,
0xbefe00c1, 0xbeff00c1,
- 0xb8f12a05, 0x80718171,
- 0x8e718271, 0x8e768871,
+ 0xbee80080, 0xbee90080,
0xbef600ff, 0x01000000,
- 0xbefc0084, 0xbf0a717c,
- 0xbf840015, 0xbf11017c,
- 0x8071ff71, 0x00001000,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
- 0xe0724000, 0x7a1d0000,
- 0xe0724100, 0x7a1d0100,
- 0xe0724200, 0x7a1d0200,
- 0xe0724300, 0x7a1d0300,
- 0x807c847c, 0x807aff7a,
- 0x00000400, 0xbf0a717c,
- 0xbf85ffef, 0xbf9c0000,
- 0xbf8200dc, 0xbef4007e,
- 0x8675ff7f, 0x0000ffff,
- 0x8775ff75, 0x00040000,
- 0xbef60080, 0xbef700ff,
- 0x00807fac, 0x866eff7f,
- 0x08000000, 0x8f6e836e,
- 0x87776e77, 0x866eff7f,
- 0x70000000, 0x8f6e816e,
- 0x87776e77, 0x866eff7f,
- 0x04000000, 0xbf84001e,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
0xbefe00c1, 0xbeff00c1,
- 0xb8ef4306, 0x866fc16f,
- 0xbf840019, 0x8e6f866f,
- 0x8e6f826f, 0xbef6006f,
- 0xb8f82a05, 0x80788178,
- 0x8e788a78, 0xb8ee1605,
- 0x806e816e, 0x8e6e866e,
- 0x80786e78, 0x8078ff78,
+ 0xb8fb4306, 0x867bc17b,
+ 0xbf84002c, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+ 0xbf840028, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0xb8fa1605,
+ 0x807a817a, 0x8e7a867a,
+ 0x80707a70, 0x8070ff70,
0x00000080, 0xbef600ff,
0x01000000, 0xbefc0080,
- 0xe0510000, 0x781d0000,
- 0xe0510100, 0x781d0000,
- 0x807cff7c, 0x00000200,
- 0x8078ff78, 0x00000200,
- 0xbf0a6f7c, 0xbf85fff6,
- 0xbef80080, 0xbefe00c1,
- 0xbeff00c1, 0xb8ef2a05,
- 0x806f816f, 0x8e6f826f,
- 0x8e76886f, 0xbef600ff,
- 0x01000000, 0xbeee0078,
- 0x8078ff78, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0x806fff6f, 0x00008000,
- 0xe0524000, 0x781d0000,
- 0xe0524100, 0x781d0100,
- 0xe0524200, 0x781d0200,
- 0xe0524300, 0x781d0300,
- 0xbf8c0f70, 0x7e000300,
+ 0xd28c0002, 0x000100c1,
+ 0xd28d0003, 0x000204c1,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x701d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000f702, 0xbf87fff7,
+ 0xbef70000, 0xbef000ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8fb2a05,
+ 0x807b817b, 0x8e7b827b,
+ 0x8e76887b, 0xbef600ff,
+ 0x01000000, 0xbefc0084,
+ 0xbf0a7b7c, 0xbf840015,
+ 0xbf11017c, 0x807bff7b,
+ 0x00001000, 0x7e000300,
0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8078ff78, 0x00000400,
- 0xbf0a6f7c, 0xbf85ffee,
- 0xbf9c0000, 0xe0524000,
- 0x6e1d0000, 0xe0524100,
- 0x6e1d0100, 0xe0524200,
- 0x6e1d0200, 0xe0524300,
- 0x6e1d0300, 0xb8f82a05,
+ 0x7e060303, 0xe0724000,
+ 0x701d0000, 0xe0724100,
+ 0x701d0100, 0xe0724200,
+ 0x701d0200, 0xe0724300,
+ 0x701d0300, 0x807c847c,
+ 0x8070ff70, 0x00000400,
+ 0xbf0a7b7c, 0xbf85ffef,
+ 0xbf9c0000, 0xbf8200da,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x08000000,
+ 0x8f6e836e, 0x87776e77,
+ 0x866eff7f, 0x70000000,
+ 0x8f6e816e, 0x87776e77,
+ 0x866eff7f, 0x04000000,
+ 0xbf84001e, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf840019,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82a05,
0x80788178, 0x8e788a78,
0xb8ee1605, 0x806e816e,
0x8e6e866e, 0x80786e78,
- 0x80f8c078, 0xb8ef1605,
- 0x806f816f, 0x8e6f846f,
- 0x8e76826f, 0xbef600ff,
- 0x01000000, 0xbefc006f,
- 0xc031003a, 0x00000078,
- 0x80f8c078, 0xbf8cc07f,
- 0x80fc907c, 0xbf800000,
- 0xbe802d00, 0xbe822d02,
- 0xbe842d04, 0xbe862d06,
- 0xbe882d08, 0xbe8a2d0a,
- 0xbe8c2d0c, 0xbe8e2d0e,
- 0xbf06807c, 0xbf84fff0,
+ 0x8078ff78, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xe0510000,
+ 0x781d0000, 0xe0510100,
+ 0x781d0000, 0x807cff7c,
+ 0x00000200, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85fff6, 0xbef80080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8ef2a05, 0x806f816f,
+ 0x8e6f826f, 0x8e76886f,
+ 0xbef600ff, 0x01000000,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0x806fff6f,
+ 0x00008000, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xbf9c0000,
+ 0xe0524000, 0x6e1d0000,
+ 0xe0524100, 0x6e1d0100,
+ 0xe0524200, 0x6e1d0200,
+ 0xe0524300, 0x6e1d0300,
0xb8f82a05, 0x80788178,
0x8e788a78, 0xb8ee1605,
0x806e816e, 0x8e6e866e,
- 0x80786e78, 0xbef60084,
+ 0x80786e78, 0x80f8c078,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f846f, 0x8e76826f,
0xbef600ff, 0x01000000,
- 0xc0211bfa, 0x00000078,
- 0x80788478, 0xc0211b3a,
+ 0xbefc006f, 0xc031003a,
+ 0x00000078, 0x80f8c078,
+ 0xbf8cc07f, 0x80fc907c,
+ 0xbf800000, 0xbe802d00,
+ 0xbe822d02, 0xbe842d04,
+ 0xbe862d06, 0xbe882d08,
+ 0xbe8a2d0a, 0xbe8c2d0c,
+ 0xbe8e2d0e, 0xbf06807c,
+ 0xbf84fff0, 0xb8f82a05,
+ 0x80788178, 0x8e788a78,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
0x00000078, 0x80788478,
- 0xc0211b7a, 0x00000078,
- 0x80788478, 0xc0211eba,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
0x00000078, 0x80788478,
- 0xc0211efa, 0x00000078,
- 0x80788478, 0xc0211c3a,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
0x00000078, 0x80788478,
- 0xc0211c7a, 0x00000078,
- 0x80788478, 0xc0211a3a,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
0x00000078, 0x80788478,
- 0xc0211a7a, 0x00000078,
- 0x80788478, 0xc0211cfa,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
0x00000078, 0x80788478,
- 0xbf8cc07f, 0xbefc006f,
- 0xbefe007a, 0xbeff007b,
- 0x866f71ff, 0x000003ff,
- 0xb96f4803, 0x866f71ff,
- 0xfffff800, 0x8f6f8b6f,
- 0xb96fa2c3, 0xb973f801,
- 0xb8ee2a05, 0x806e816e,
- 0x8e6e8a6e, 0xb8ef1605,
- 0x806f816f, 0x8e6f866f,
- 0x806e6f6e, 0x806e746e,
- 0x826f8075, 0x866fff6f,
- 0x0000ffff, 0xc0071cb7,
- 0x00000040, 0xc00b1d37,
- 0x00000048, 0xc0031e77,
- 0x00000058, 0xc0071eb7,
- 0x0000005c, 0xbf8cc07f,
- 0x866fff6d, 0xf0000000,
- 0x8f6f9c6f, 0x8e6f906f,
- 0xbeee0080, 0x876e6f6e,
- 0x866fff6d, 0x08000000,
- 0x8f6f9b6f, 0x8e6f8f6f,
- 0x876e6f6e, 0x866fff70,
- 0x00800000, 0x8f6f976f,
- 0xb96ef807, 0x866dff6d,
- 0x0000ffff, 0x86fe7e7e,
- 0x86ea6a6a, 0x8f6e8370,
- 0xb96ee0c2, 0xbf800002,
- 0xb9700002, 0xbf8a0000,
- 0x95806f6c, 0xbf810000,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe0070,
+ 0xbeff0071, 0x866f7bff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f7bff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f866f, 0x806e6f6e,
+ 0x806e746e, 0x826f8075,
+ 0x866fff6f, 0x0000ffff,
+ 0xc00b1c37, 0x00000050,
+ 0xc00b1d37, 0x00000060,
+ 0xc0031e77, 0x00000074,
+ 0xbf8cc07f, 0x866fff6d,
+ 0xf8000000, 0x8f6f9b6f,
+ 0x8e6f906f, 0xbeee0080,
+ 0x876e6f6e, 0x866fff6d,
+ 0x04000000, 0x8f6f9a6f,
+ 0x8e6f8f6f, 0x876e6f6e,
+ 0x866fff7a, 0x00800000,
+ 0x8f6f976f, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0x8f6e837a, 0xb96ee0c2,
+ 0xbf800002, 0xb97a0002,
+ 0xbf8a0000, 0x95806f6c,
+ 0xbf810000, 0x00000000,
};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
index abe1a5da29fb..a47f5b933120 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP
s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
end
- //check whether there is mem_viol
- s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
- s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
- s_cbranch_scc0 L_NO_PC_REWIND
-
- //if so, need rewind PC assuming GDS operation gets NACKed
- s_mov_b32 s_save_tmp, 0 //clear mem_viol bit
- s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit
- s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
- s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8
- s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc
-
-L_NO_PC_REWIND:
s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index 0bb9c577b3a2..6bae2e022c6e 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
-var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
-var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME
-var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME
-var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
+var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
+var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME
var s_save_spi_init_lo = exec_lo
var s_save_spi_init_hi = exec_hi
@@ -162,8 +162,8 @@ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra
var s_save_pc_hi = ttmp1
var s_save_exec_lo = ttmp2
var s_save_exec_hi = ttmp3
-var s_save_tmp = ttmp4
-var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine
+var s_save_tmp = ttmp14
+var s_save_trapsts = ttmp15 //not really used until the end of the SAVE routine
var s_save_xnack_mask_lo = ttmp6
var s_save_xnack_mask_hi = ttmp7
var s_save_buf_rsrc0 = ttmp8
@@ -171,9 +171,9 @@ var s_save_buf_rsrc1 = ttmp9
var s_save_buf_rsrc2 = ttmp10
var s_save_buf_rsrc3 = ttmp11
var s_save_status = ttmp12
-var s_save_mem_offset = ttmp14
+var s_save_mem_offset = ttmp4
var s_save_alloc_size = s_save_trapsts //conflict
-var s_save_m0 = ttmp15
+var s_save_m0 = ttmp5
var s_save_ttmps_lo = s_save_tmp //no conflict
var s_save_ttmps_hi = s_save_trapsts //no conflict
@@ -207,10 +207,10 @@ var s_restore_mode = ttmp7
var s_restore_pc_lo = ttmp0
var s_restore_pc_hi = ttmp1
-var s_restore_exec_lo = ttmp14
-var s_restore_exec_hi = ttmp15
-var s_restore_status = ttmp4
-var s_restore_trapsts = ttmp5
+var s_restore_exec_lo = ttmp4
+var s_restore_exec_hi = ttmp5
+var s_restore_status = ttmp14
+var s_restore_trapsts = ttmp15
var s_restore_xnack_mask_lo = xnack_mask_lo
var s_restore_xnack_mask_hi = xnack_mask_hi
var s_restore_buf_rsrc0 = ttmp8
@@ -266,10 +266,16 @@ if (!EMU_RUN_HACK)
L_HALT_WAVE:
// If STATUS.HALT is set then this fault must come from SQC instruction fetch.
- // We cannot prevent further faults so just terminate the wavefront.
+ // We cannot prevent further faults. Spin wait until context saved.
s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
s_cbranch_scc0 L_NOT_ALREADY_HALTED
- s_endpgm
+
+L_WAIT_CTX_SAVE:
+ s_sleep 0x10
+ s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+ s_cbranch_scc0 L_WAIT_CTX_SAVE
+
L_NOT_ALREADY_HALTED:
s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
@@ -293,12 +299,12 @@ L_FETCH_2ND_TRAP:
// Read second-level TBA/TMA from first-level TMA and jump if available.
// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
// ttmp12 holds SQ_WAVE_STATUS
- s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO)
- s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI)
- s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
- s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA
+ s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
+ s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
+ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
s_waitcnt lgkmcnt(0)
- s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA
+ s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
s_waitcnt lgkmcnt(0)
s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
@@ -405,7 +411,7 @@ end
else
end
- // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+ // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
get_vgpr_size_bytes(s_save_ttmps_lo)
get_sgpr_size_bytes(s_save_ttmps_hi)
@@ -413,13 +419,11 @@ end
s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0
s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
- s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1
- ack_sqc_store_workaround()
- s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
+ s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
ack_sqc_store_workaround()
- s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1
+ s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
ack_sqc_store_workaround()
- s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1
+ s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
ack_sqc_store_workaround()
/* setup Resource Contants */
@@ -1093,7 +1097,7 @@ end
//s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
- // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+ // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
// ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
get_vgpr_size_bytes(s_restore_ttmps_lo)
get_sgpr_size_bytes(s_restore_ttmps_hi)
@@ -1101,10 +1105,9 @@ end
s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
- s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1
- s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1
- s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1
- s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
+ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
s_waitcnt lgkmcnt(0)
//reuse s_restore_m0 as a temp register
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 083bd8114db1..ea82828fdc76 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
q_properties->type = KFD_QUEUE_TYPE_SDMA;
+ else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
+ q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
else
return -ENOTSUPP;
@@ -522,7 +524,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
struct kfd_process_device *pdd;
dev = kfd_device_by_id(args->gpu_id);
- if (dev == NULL)
+ if (!dev)
return -EINVAL;
mutex_lock(&p->mutex);
@@ -1272,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
if (args->size != kfd_doorbell_process_slice(dev))
return -EINVAL;
offset = kfd_get_process_doorbells(dev, p);
+ } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ if (args->size != PAGE_SIZE)
+ return -EINVAL;
+ offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+ if (!offset)
+ return -ENOMEM;
}
mutex_lock(&p->mutex);
@@ -1301,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
args->mmap_offset = offset;
+ /* MMIO is mapped through kfd device
+ * Generate a kfd mmap offset
+ */
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
+ args->mmap_offset <<= PAGE_SHIFT;
+ }
+
return 0;
err_free:
@@ -1551,6 +1567,32 @@ copy_from_user_failed:
return err;
}
+static int kfd_ioctl_alloc_queue_gws(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ int retval;
+ struct kfd_ioctl_alloc_queue_gws_args *args = data;
+ struct kfd_dev *dev;
+
+ if (!hws_gws_support)
+ return -EINVAL;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (!dev) {
+ pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+ return -EINVAL;
+ }
+ if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+ retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
+ mutex_unlock(&p->mutex);
+
+ args->first_gws = 0;
+ return retval;
+}
+
static int kfd_ioctl_get_dmabuf_info(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -1753,6 +1795,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
kfd_ioctl_import_dmabuf, 0),
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
+ kfd_ioctl_alloc_queue_gws, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
@@ -1845,6 +1889,39 @@ err_i1:
return retcode;
}
+static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
+ struct vm_area_struct *vma)
+{
+ phys_addr_t address;
+ int ret;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+
+ vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+ VM_DONTDUMP | VM_PFNMAP;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ pr_debug("Process %d mapping mmio page\n"
+ " target user address == 0x%08llX\n"
+ " physical address == 0x%08llX\n"
+ " vm_flags == 0x%04lX\n"
+ " size == 0x%04lX\n",
+ process->pasid, (unsigned long long) vma->vm_start,
+ address, vma->vm_flags, PAGE_SIZE);
+
+ ret = io_remap_pfn_range(vma,
+ vma->vm_start,
+ address >> PAGE_SHIFT,
+ PAGE_SIZE,
+ vma->vm_page_prot);
+ return ret;
+}
+
+
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
@@ -1875,6 +1952,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
if (!dev)
return -ENODEV;
return kfd_reserved_mem_mmap(dev, process, vma);
+ case KFD_MMAP_TYPE_MMIO:
+ if (!dev)
+ return -ENODEV;
+ return kfd_mmio_mmap(dev, process, vma);
}
return -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 2e7c44955f43..59f8ca4297db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
#define polaris10_cache_info carrizo_cache_info
#define polaris11_cache_info carrizo_cache_info
#define polaris12_cache_info carrizo_cache_info
+#define vegam_cache_info carrizo_cache_info
/* TODO - check & update Vega10 cache details */
#define vega10_cache_info carrizo_cache_info
#define raven_cache_info carrizo_cache_info
@@ -372,7 +373,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
props->weight = 20;
else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
- props->weight = 15;
+ props->weight = 15 * iolink->num_hops_xgmi;
else
props->weight = node_distance(id_from, id_to);
@@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = polaris12_cache_info;
num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
break;
+ case CHIP_VEGAM:
+ pcache_info = vegam_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
+ break;
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
@@ -1092,6 +1097,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
struct kfd_dev *kdev,
+ struct kfd_dev *peer_kdev,
struct crat_subtype_iolink *sub_type_hdr,
uint32_t proximity_domain_from,
uint32_t proximity_domain_to)
@@ -1110,6 +1116,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
sub_type_hdr->proximity_domain_from = proximity_domain_from;
sub_type_hdr->proximity_domain_to = proximity_domain_to;
+ sub_type_hdr->num_hops_xgmi =
+ amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
return 0;
}
@@ -1287,7 +1295,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
(char *)sub_type_hdr +
sizeof(struct crat_subtype_iolink));
ret = kfd_fill_gpu_xgmi_link_to_gpu(
- &avail_size, kdev,
+ &avail_size, kdev, peer_dev->gpu,
(struct crat_subtype_iolink *)sub_type_hdr,
proximity_domain, nid);
if (ret < 0)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index 7c3f192fe25f..d54ceebd346b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -274,7 +274,8 @@ struct crat_subtype_iolink {
uint32_t minimum_bandwidth_mbs;
uint32_t maximum_bandwidth_mbs;
uint32_t recommended_transfer_size;
- uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH];
+ uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
+ uint8_t num_hops_xgmi;
};
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 765b58a17dc7..9d1b026e29e9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
.needs_iommu_device = true,
.needs_pci_atomics = true,
.num_sdma_engines = 1,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
#endif
@@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = true,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
+ .num_sdma_queues_per_engine = 2,
+};
+
+static const struct kfd_device_info vegam_device_info = {
+ .asic_family = CHIP_VEGAM,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 4,
+ .ih_ring_entry_size = 4 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = true,
+ .num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
@@ -282,6 +313,7 @@ static const struct kfd_device_info vega20_device_info = {
.needs_iommu_device = false,
.needs_pci_atomics = false,
.num_sdma_engines = 2,
+ .num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 8,
};
@@ -373,6 +405,9 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x6995, &polaris12_device_info }, /* Polaris12 */
{ 0x6997, &polaris12_device_info }, /* Polaris12 */
{ 0x699F, &polaris12_device_info }, /* Polaris12 */
+ { 0x694C, &vegam_device_info }, /* VegaM */
+ { 0x694E, &vegam_device_info }, /* VegaM */
+ { 0x694F, &vegam_device_info }, /* VegaM */
{ 0x6860, &vega10_device_info }, /* Vega10 */
{ 0x6861, &vega10_device_info }, /* Vega10 */
{ 0x6862, &vega10_device_info }, /* Vega10 */
@@ -518,6 +553,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
} else
kfd->max_proc_per_quantum = hws_max_conc_proc;
+ /* Allocate global GWS that is shared by all KFD processes */
+ if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
+ amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
+ dev_err(kfd_device, "Could not allocate %d gws\n",
+ amdgpu_amdkfd_get_num_gws(kfd->kgd));
+ goto out;
+ }
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
@@ -541,7 +583,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
false)) {
dev_err(kfd_device, "Could not allocate %d bytes\n", size);
- goto out;
+ goto alloc_gtt_mem_failure;
}
dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
@@ -611,6 +653,9 @@ kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+alloc_gtt_mem_failure:
+ if (hws_gws_support)
+ amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
dev_err(kfd_device,
"device %x:%x NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
@@ -628,6 +673,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_doorbell_fini(kfd);
kfd_gtt_sa_fini(kfd);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+ if (hws_gws_support)
+ amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
}
kfree(kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index ae381450601c..ece35c7a77b5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
- unsigned int sdma_queue_id);
+ struct queue *q);
static void kfd_process_hw_exception(struct work_struct *work);
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
{
- if (type == KFD_QUEUE_TYPE_SDMA)
+ if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
return KFD_MQD_TYPE_SDMA;
return KFD_MQD_TYPE_CP;
}
@@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
return dqm->dev->device_info->num_sdma_engines;
}
+static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
+{
+ return dqm->dev->device_info->num_xgmi_sdma_engines;
+}
+
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
return dqm->dev->device_info->num_sdma_engines
* dqm->dev->device_info->num_sdma_queues_per_engine;
}
+unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
+{
+ return dqm->dev->device_info->num_xgmi_sdma_engines
+ * dqm->dev->device_info->num_sdma_queues_per_engine;
+}
+
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
* preserve the user mode ABI.
*/
q->doorbell_id = q->properties.queue_id;
- } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
/* For SDMA queues on SOC15 with 8-byte doorbell, use static
* doorbell assignments based on the engine and queue id.
* The doobell index distance between RLC (2*i) and (2*i+1)
@@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
struct kfd_dev *dev = qpd->dqm->dev;
if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
- q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
return;
old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
@@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
retval = create_compute_queue_nocpsch(dqm, q, qpd);
- else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
retval = create_sdma_queue_nocpsch(dqm, q, qpd);
else
retval = -EINVAL;
@@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+ dqm->xgmi_sdma_queue_count++;
/*
* Unconditionally increment this counter, regardless of the queue's
@@ -368,9 +384,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr;
int retval;
- mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
- if (!mqd_mgr)
- return -ENOMEM;
+ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
retval = allocate_hqd(dqm, q);
if (retval)
@@ -425,16 +439,17 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
int retval;
struct mqd_manager *mqd_mgr;
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
- get_mqd_type_from_queue_type(q->properties.type));
- if (!mqd_mgr)
- return -ENOMEM;
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
deallocate_hqd(dqm, q);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
- deallocate_sdma_queue(dqm, q->sdma_id);
+ deallocate_sdma_queue(dqm, q);
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ dqm->xgmi_sdma_queue_count--;
+ deallocate_sdma_queue(dqm, q);
} else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
@@ -501,12 +516,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
retval = -ENODEV;
goto out_unlock;
}
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
- get_mqd_type_from_queue_type(q->properties.type));
- if (!mqd_mgr) {
- retval = -ENOMEM;
- goto out_unlock;
- }
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
/*
* Eviction state logic: we only mark active queues as evicted
* to avoid the overhead of restoring inactive queues later
@@ -529,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
}
} else if (prev_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
- q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
+ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
@@ -556,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
retval = map_queues_cpsch(dqm);
else if (q->properties.is_active &&
(q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
- q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
+ q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
if (WARN(q->process->mm != current->mm,
"should only run in user thread"))
retval = -EFAULT;
@@ -571,27 +584,6 @@ out_unlock:
return retval;
}
-static struct mqd_manager *get_mqd_manager(
- struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
-{
- struct mqd_manager *mqd_mgr;
-
- if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
- return NULL;
-
- pr_debug("mqd type %d\n", type);
-
- mqd_mgr = dqm->mqd_mgrs[type];
- if (!mqd_mgr) {
- mqd_mgr = mqd_manager_init(type, dqm->dev);
- if (!mqd_mgr)
- pr_err("mqd manager is NULL");
- dqm->mqd_mgrs[type] = mqd_mgr;
- }
-
- return mqd_mgr;
-}
-
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
@@ -612,13 +604,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
list_for_each_entry(q, &qpd->queues_list, list) {
if (!q->properties.is_active)
continue;
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
- get_mqd_type_from_queue_type(q->properties.type));
- if (!mqd_mgr) { /* should not be here */
- pr_err("Cannot evict queue, mqd mgr is NULL\n");
- retval = -ENOMEM;
- goto out;
- }
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
q->properties.is_evicted = true;
q->properties.is_active = false;
retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
@@ -717,13 +704,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
list_for_each_entry(q, &qpd->queues_list, list) {
if (!q->properties.is_evicted)
continue;
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
- get_mqd_type_from_queue_type(q->properties.type));
- if (!mqd_mgr) { /* should not be here */
- pr_err("Cannot restore queue, mqd mgr is NULL\n");
- retval = -ENOMEM;
- goto out;
- }
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
q->properties.is_evicted = false;
q->properties.is_active = true;
retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
@@ -812,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm,
retval = dqm->asic_ops.update_qpd(dqm, qpd);
dqm->processes_count++;
- kfd_inc_compute_active(dqm->dev);
dqm_unlock(dqm);
+ /* Outside the DQM lock because under the DQM lock we can't do
+ * reclaim or take other locks that others hold while reclaiming.
+ */
+ kfd_inc_compute_active(dqm->dev);
+
return retval;
}
@@ -836,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
- kfd_dec_compute_active(dqm->dev);
goto out;
}
}
@@ -844,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm,
retval = 1;
out:
dqm_unlock(dqm);
+
+ /* Outside the DQM lock because under the DQM lock we can't do
+ * reclaim or take other locks that others hold while reclaiming.
+ */
+ if (!retval)
+ kfd_dec_compute_active(dqm->dev);
+
return retval;
}
@@ -879,6 +871,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
dqm->sdma_queue_count = 0;
+ dqm->xgmi_sdma_queue_count = 0;
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -890,7 +883,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
}
dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
- dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
+ dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
+ dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
return 0;
}
@@ -921,26 +915,56 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
}
static int allocate_sdma_queue(struct device_queue_manager *dqm,
- unsigned int *sdma_queue_id)
+ struct queue *q)
{
int bit;
- if (dqm->sdma_bitmap == 0)
- return -ENOMEM;
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ if (dqm->sdma_bitmap == 0)
+ return -ENOMEM;
+ bit = __ffs64(dqm->sdma_bitmap);
+ dqm->sdma_bitmap &= ~(1ULL << bit);
+ q->sdma_id = bit;
+ q->properties.sdma_engine_id = q->sdma_id %
+ get_num_sdma_engines(dqm);
+ q->properties.sdma_queue_id = q->sdma_id /
+ get_num_sdma_engines(dqm);
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ if (dqm->xgmi_sdma_bitmap == 0)
+ return -ENOMEM;
+ bit = __ffs64(dqm->xgmi_sdma_bitmap);
+ dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
+ q->sdma_id = bit;
+ /* sdma_engine_id is sdma id including
+ * both PCIe-optimized SDMAs and XGMI-
+ * optimized SDMAs. The calculation below
+ * assumes the first N engines are always
+ * PCIe-optimized ones
+ */
+ q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
+ q->sdma_id % get_num_xgmi_sdma_engines(dqm);
+ q->properties.sdma_queue_id = q->sdma_id /
+ get_num_xgmi_sdma_engines(dqm);
+ }
- bit = ffs(dqm->sdma_bitmap) - 1;
- dqm->sdma_bitmap &= ~(1 << bit);
- *sdma_queue_id = bit;
+ pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
+ pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
return 0;
}
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
- unsigned int sdma_queue_id)
+ struct queue *q)
{
- if (sdma_queue_id >= get_num_sdma_queues(dqm))
- return;
- dqm->sdma_bitmap |= (1 << sdma_queue_id);
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ if (q->sdma_id >= get_num_sdma_queues(dqm))
+ return;
+ dqm->sdma_bitmap |= (1ULL << q->sdma_id);
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
+ return;
+ dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
+ }
}
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
@@ -950,25 +974,16 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr;
int retval;
- mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
- if (!mqd_mgr)
- return -ENOMEM;
+ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA];
- retval = allocate_sdma_queue(dqm, &q->sdma_id);
+ retval = allocate_sdma_queue(dqm, q);
if (retval)
return retval;
- q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
- q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
-
retval = allocate_doorbell(qpd, q);
if (retval)
goto out_deallocate_sdma_queue;
- pr_debug("SDMA id is: %d\n", q->sdma_id);
- pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
- pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
-
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
@@ -987,7 +1002,7 @@ out_uninit_mqd:
out_deallocate_doorbell:
deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
- deallocate_sdma_queue(dqm, q->sdma_id);
+ deallocate_sdma_queue(dqm, q);
return retval;
}
@@ -1045,8 +1060,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->processes_count = 0;
dqm->sdma_queue_count = 0;
+ dqm->xgmi_sdma_queue_count = 0;
dqm->active_runlist = false;
- dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
+ dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
+ dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
@@ -1161,38 +1178,26 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
int retval;
struct mqd_manager *mqd_mgr;
- retval = 0;
-
- dqm_lock(dqm);
-
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
pr_warn("Can't create new usermode queue because %d queues were already created\n",
dqm->total_queue_count);
retval = -EPERM;
- goto out_unlock;
+ goto out;
}
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
- retval = allocate_sdma_queue(dqm, &q->sdma_id);
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ retval = allocate_sdma_queue(dqm, q);
if (retval)
- goto out_unlock;
- q->properties.sdma_queue_id =
- q->sdma_id / get_num_sdma_engines(dqm);
- q->properties.sdma_engine_id =
- q->sdma_id % get_num_sdma_engines(dqm);
+ goto out;
}
retval = allocate_doorbell(qpd, q);
if (retval)
goto out_deallocate_sdma_queue;
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
- get_mqd_type_from_queue_type(q->properties.type));
-
- if (!mqd_mgr) {
- retval = -ENOMEM;
- goto out_deallocate_doorbell;
- }
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
/*
* Eviction state logic: we only mark active queues as evicted
* to avoid the overhead of restoring inactive queues later
@@ -1201,9 +1206,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
q->properties.is_evicted = (q->properties.queue_size > 0 &&
q->properties.queue_percent > 0 &&
q->properties.queue_address != 0);
-
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
-
q->properties.tba_addr = qpd->tba_addr;
q->properties.tma_addr = qpd->tma_addr;
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
@@ -1211,6 +1214,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (retval)
goto out_deallocate_doorbell;
+ dqm_lock(dqm);
+
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
if (q->properties.is_active) {
@@ -1221,6 +1226,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+ dqm->xgmi_sdma_queue_count++;
/*
* Unconditionally increment this counter, regardless of the queue's
* type or whether the queue is active.
@@ -1236,11 +1243,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
out_deallocate_doorbell:
deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
- deallocate_sdma_queue(dqm, q->sdma_id);
-out_unlock:
- dqm_unlock(dqm);
-
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+ deallocate_sdma_queue(dqm, q);
+out:
return retval;
}
@@ -1268,12 +1274,18 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
return 0;
}
-static int unmap_sdma_queues(struct device_queue_manager *dqm,
- unsigned int sdma_engine)
+static int unmap_sdma_queues(struct device_queue_manager *dqm)
{
- return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
- KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
- sdma_engine);
+ int i, retval = 0;
+
+ for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
+ dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
+ retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
+ KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
+ if (retval)
+ return retval;
+ }
+ return retval;
}
/* dqm->lock mutex has to be locked before calling this function */
@@ -1309,13 +1321,11 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
if (!dqm->active_runlist)
return retval;
- pr_debug("Before destroying queues, sdma queue count is : %u\n",
- dqm->sdma_queue_count);
+ pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
+ dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
- if (dqm->sdma_queue_count > 0) {
- unmap_sdma_queues(dqm, 0);
- unmap_sdma_queues(dqm, 1);
- }
+ if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
+ unmap_sdma_queues(dqm);
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
filter, filter_param, false, 0);
@@ -1379,18 +1389,17 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
}
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
- get_mqd_type_from_queue_type(q->properties.type));
- if (!mqd_mgr) {
- retval = -ENOMEM;
- goto failed;
- }
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
deallocate_doorbell(qpd, q);
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
- deallocate_sdma_queue(dqm, q->sdma_id);
+ deallocate_sdma_queue(dqm, q);
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ dqm->xgmi_sdma_queue_count--;
+ deallocate_sdma_queue(dqm, q);
}
list_del(&q->list);
@@ -1403,8 +1412,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
qpd->reset_wavefronts = true;
}
- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
-
/*
* Unconditionally decrement this counter, regardless of the queue's
* type
@@ -1415,9 +1422,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
dqm_unlock(dqm);
+ /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
+ mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
return retval;
-failed:
failed_try_destroy_debugged_queue:
dqm_unlock(dqm);
@@ -1520,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
struct queue *q, *next;
struct device_process_node *cur, *next_dpn;
int retval = 0;
+ bool found = false;
dqm_lock(dqm);
@@ -1538,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
- kfd_dec_compute_active(dqm->dev);
+ found = true;
break;
}
}
dqm_unlock(dqm);
+
+ /* Outside the DQM lock because under the DQM lock we can't do
+ * reclaim or take other locks that others hold while reclaiming.
+ */
+ if (found)
+ kfd_dec_compute_active(dqm->dev);
+
return retval;
}
@@ -1564,11 +1581,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
goto dqm_unlock;
}
- mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
- if (!mqd_mgr) {
- r = -ENOMEM;
- goto dqm_unlock;
- }
+ mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
if (!mqd_mgr->get_wave_state) {
r = -EINVAL;
@@ -1593,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
struct device_process_node *cur, *next_dpn;
enum kfd_unmap_queues_filter filter =
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
+ bool found = false;
retval = 0;
@@ -1611,7 +1625,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
- deallocate_sdma_queue(dqm, q->sdma_id);
+ deallocate_sdma_queue(dqm, q);
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ dqm->xgmi_sdma_queue_count--;
+ deallocate_sdma_queue(dqm, q);
}
if (q->properties.is_active)
@@ -1626,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
list_del(&cur->list);
kfree(cur);
dqm->processes_count--;
- kfd_dec_compute_active(dqm->dev);
+ found = true;
break;
}
}
@@ -1638,21 +1655,68 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
qpd->reset_wavefronts = false;
}
- /* lastly, free mqd resources */
+ dqm_unlock(dqm);
+
+ /* Outside the DQM lock because under the DQM lock we can't do
+ * reclaim or take other locks that others hold while reclaiming.
+ */
+ if (found)
+ kfd_dec_compute_active(dqm->dev);
+
+ /* Lastly, free mqd resources.
+ * Do uninit_mqd() after dqm_unlock to avoid circular locking.
+ */
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
- mqd_mgr = dqm->ops.get_mqd_manager(dqm,
- get_mqd_type_from_queue_type(q->properties.type));
- if (!mqd_mgr) {
- retval = -ENOMEM;
- goto out;
- }
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
list_del(&q->list);
qpd->queue_count--;
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
}
-out:
- dqm_unlock(dqm);
+ return retval;
+}
+
+static int init_mqd_managers(struct device_queue_manager *dqm)
+{
+ int i, j;
+ struct mqd_manager *mqd_mgr;
+
+ for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
+ mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
+ if (!mqd_mgr) {
+ pr_err("mqd manager [%d] initialization failed\n", i);
+ goto out_free;
+ }
+ dqm->mqd_mgrs[i] = mqd_mgr;
+ }
+
+ return 0;
+
+out_free:
+ for (j = 0; j < i; j++) {
+ kfree(dqm->mqd_mgrs[j]);
+ dqm->mqd_mgrs[j] = NULL;
+ }
+
+ return -ENOMEM;
+}
+
+/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
+static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
+{
+ int retval;
+ struct kfd_dev *dev = dqm->dev;
+ struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
+ uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
+ dev->device_info->num_sdma_engines *
+ dev->device_info->num_sdma_queues_per_engine +
+ dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+
+ retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
+ &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
+ (void *)&(mem_obj->cpu_ptr), true);
+
return retval;
}
@@ -1693,7 +1757,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.stop = stop_cpsch;
dqm->ops.destroy_queue = destroy_queue_cpsch;
dqm->ops.update_queue = update_queue;
- dqm->ops.get_mqd_manager = get_mqd_manager;
dqm->ops.register_process = register_process;
dqm->ops.unregister_process = unregister_process;
dqm->ops.uninitialize = uninitialize;
@@ -1713,7 +1776,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.create_queue = create_queue_nocpsch;
dqm->ops.destroy_queue = destroy_queue_nocpsch;
dqm->ops.update_queue = update_queue;
- dqm->ops.get_mqd_manager = get_mqd_manager;
dqm->ops.register_process = register_process;
dqm->ops.unregister_process = unregister_process;
dqm->ops.initialize = initialize_nocpsch;
@@ -1749,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
break;
@@ -1764,6 +1827,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
goto out_free;
}
+ if (init_mqd_managers(dqm))
+ goto out_free;
+
+ if (allocate_hiq_sdma_mqd(dqm)) {
+ pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
+ goto out_free;
+ }
+
if (!dqm->ops.initialize(dqm))
return dqm;
@@ -1772,9 +1843,17 @@ out_free:
return NULL;
}
+void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd)
+{
+ WARN(!mqd, "No hiq sdma mqd trunk to free");
+
+ amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
+}
+
void device_queue_manager_uninit(struct device_queue_manager *dqm)
{
dqm->ops.uninitialize(dqm);
+ deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
kfree(dqm);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 70e38a2e23b9..88b4c007696e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -48,8 +48,6 @@ struct device_process_node {
*
* @update_queue: Queue update routine.
*
- * @get_mqd_manager: Returns the mqd manager according to the mqd type.
- *
* @exeute_queues: Dispatches the queues list to the H/W.
*
* @register_process: This routine associates a specific process with device.
@@ -97,10 +95,6 @@ struct device_queue_manager_ops {
int (*update_queue)(struct device_queue_manager *dqm,
struct queue *q);
- struct mqd_manager * (*get_mqd_manager)
- (struct device_queue_manager *dqm,
- enum KFD_MQD_TYPE type);
-
int (*register_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
@@ -158,6 +152,8 @@ struct device_queue_manager_asic_ops {
void (*init_sdma_vm)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
+ struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev);
};
/**
@@ -185,10 +181,12 @@ struct device_queue_manager {
unsigned int processes_count;
unsigned int queue_count;
unsigned int sdma_queue_count;
+ unsigned int xgmi_sdma_queue_count;
unsigned int total_queue_count;
unsigned int next_pipe_to_allocate;
unsigned int *allocated_queues;
- unsigned int sdma_bitmap;
+ uint64_t sdma_bitmap;
+ uint64_t xgmi_sdma_bitmap;
unsigned int vmid_bitmap;
uint64_t pipelines_addr;
struct kfd_mem_obj *pipeline_mem;
@@ -201,6 +199,7 @@ struct device_queue_manager {
/* hw exception */
bool is_hws_hang;
struct work_struct hw_exception_work;
+ struct kfd_mem_obj hiq_sdma_mqd;
};
void device_queue_manager_init_cik(
@@ -219,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm);
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
+unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index aed4c21417bf..0d26506798cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -48,6 +48,7 @@ void device_queue_manager_init_cik(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik;
asic_ops->init_sdma_vm = init_sdma_vm;
+ asic_ops->mqd_manager_init = mqd_manager_init_cik;
}
void device_queue_manager_init_cik_hawaii(
@@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
asic_ops->update_qpd = update_qpd_cik_hawaii;
asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
+ asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 417515332c35..e9fe39382371 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -37,6 +37,7 @@ void device_queue_manager_init_v9(
{
asic_ops->update_qpd = update_qpd_v9;
asic_ops->init_sdma_vm = init_sdma_vm_v9;
+ asic_ops->mqd_manager_init = mqd_manager_init_v9;
}
static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index c3a5dcfe877a..3a7cb2f88366 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -54,6 +54,7 @@ void device_queue_manager_init_vi(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
asic_ops->update_qpd = update_qpd_vi;
asic_ops->init_sdma_vm = init_sdma_vm;
+ asic_ops->mqd_manager_init = mqd_manager_init_vi;
}
void device_queue_manager_init_vi_tonga(
@@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga(
asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
asic_ops->update_qpd = update_qpd_vi_tonga;
asic_ops->init_sdma_vm = init_sdma_vm_tonga;
+ asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 6e1d41c5bf86..d674d4b3340f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
return; /* Presumably process exited. */
memset(&memory_exception_data, 0, sizeof(memory_exception_data));
memory_exception_data.gpu_id = dev->id;
- memory_exception_data.failure.imprecise = 1;
+ memory_exception_data.failure.imprecise = true;
/* Set failure reason */
if (info) {
memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 213ea5454d11..22a8e88b6a67 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
kfd_init_apertures_vi(pdd, id);
break;
case CHIP_VEGA10:
@@ -435,5 +436,3 @@ int kfd_init_apertures(struct kfd_process *process)
return 0;
}
-
-
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index f1596881f20a..1cc03b3ddbb9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -58,9 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->nop_packet = nop.u32all;
switch (type) {
case KFD_QUEUE_TYPE_DIQ:
+ kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
+ break;
case KFD_QUEUE_TYPE_HIQ:
- kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm,
- KFD_MQD_TYPE_HIQ);
+ kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
break;
default:
pr_err("Invalid queue type %d\n", type);
@@ -314,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
kernel_queue_init_vi(&kq->ops_asic_specific);
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 33830b1a5a54..07f02f8e4fe4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -153,14 +153,13 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
sizeof(struct pm4_mes_map_queues));
- packet->bitfields2.alloc_format =
- alloc_format__mes_map_queues__one_per_pipe_vi;
packet->bitfields2.num_queues = 1;
packet->bitfields2.queue_sel =
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__compute_vi;
+ packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
packet->bitfields2.queue_type =
queue_type__mes_map_queues__normal_compute_vi;
@@ -175,6 +174,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_map_queues__debug_interface_queue_vi;
break;
case KFD_QUEUE_TYPE_SDMA:
+ case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi;
use_static = false; /* no static queues under SDMA */
@@ -221,6 +221,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
engine_sel__mes_unmap_queues__compute;
break;
case KFD_QUEUE_TYPE_SDMA:
+ case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
index bf20c6d32ef3..2adaf40027eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
@@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
sizeof(struct pm4_mes_map_queues));
- packet->bitfields2.alloc_format =
- alloc_format__mes_map_queues__one_per_pipe_vi;
packet->bitfields2.num_queues = 1;
packet->bitfields2.queue_sel =
queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
@@ -212,6 +210,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
queue_type__mes_map_queues__debug_interface_queue_vi;
break;
case KFD_QUEUE_TYPE_SDMA:
+ case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
engine_sel__mes_map_queues__sdma0_vi;
use_static = false; /* no static queues under SDMA */
@@ -258,6 +257,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
engine_sel__mes_unmap_queues__compute;
break;
case KFD_QUEUE_TYPE_SDMA:
+ case KFD_QUEUE_TYPE_SDMA_XGMI:
packet->bitfields2.engine_sel =
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index aed9b9b82213..9307811bc427 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -23,34 +23,54 @@
#include "kfd_mqd_manager.h"
#include "amdgpu_amdkfd.h"
+#include "kfd_device_queue_manager.h"
-struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev)
+struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev)
{
- switch (dev->device_info->asic_family) {
- case CHIP_KAVERI:
- return mqd_manager_init_cik(type, dev);
- case CHIP_HAWAII:
- return mqd_manager_init_cik_hawaii(type, dev);
- case CHIP_CARRIZO:
- return mqd_manager_init_vi(type, dev);
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- return mqd_manager_init_vi_tonga(type, dev);
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_RAVEN:
- return mqd_manager_init_v9(type, dev);
- default:
- WARN(1, "Unexpected ASIC family %u",
- dev->device_info->asic_family);
- }
+ struct kfd_mem_obj *mqd_mem_obj = NULL;
+
+ mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+ if (!mqd_mem_obj)
+ return NULL;
+
+ mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem;
+ mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr;
+ mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr;
+
+ return mqd_mem_obj;
+}
+
+struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
+ struct queue_properties *q)
+{
+ struct kfd_mem_obj *mqd_mem_obj = NULL;
+ uint64_t offset;
- return NULL;
+ mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+ if (!mqd_mem_obj)
+ return NULL;
+
+ offset = (q->sdma_engine_id *
+ dev->device_info->num_sdma_queues_per_engine +
+ q->sdma_queue_id) *
+ dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
+
+ offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+
+ mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
+ + offset);
+ mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
+ mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t)
+ dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
+
+ return mqd_mem_obj;
+}
+
+void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ WARN_ON(!mqd_mem_obj->gtt_mem);
+ kfree(mqd_mem_obj);
}
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index f8261313ae7b..56af256a191b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -99,8 +99,16 @@ struct mqd_manager {
struct mutex mqd_mutex;
struct kfd_dev *dev;
+ uint32_t mqd_size;
};
+struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev);
+
+struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
+ struct queue_properties *q);
+void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj);
+
void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
const uint32_t *cu_mask, uint32_t cu_mask_count,
uint32_t *se_mask);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index ae90a99909ef..6e8509ec29d9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -66,6 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
}
+static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+ struct queue_properties *q)
+{
+ struct kfd_mem_obj *mqd_mem_obj;
+
+ if (q->type == KFD_QUEUE_TYPE_HIQ)
+ return allocate_hiq_mqd(kfd);
+
+ if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd),
+ &mqd_mem_obj))
+ return NULL;
+
+ return mqd_mem_obj;
+}
+
+
static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -73,11 +89,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
uint64_t addr;
struct cik_mqd *m;
int retval;
+ struct kfd_dev *kfd = mm->dev;
- retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
- mqd_mem_obj);
-
- if (retval != 0)
+ *mqd_mem_obj = allocate_mqd(kfd, q);
+ if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -136,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
{
int retval;
struct cik_sdma_rlc_registers *m;
+ struct kfd_dev *dev = mm->dev;
- retval = kfd_gtt_sa_allocate(mm->dev,
- sizeof(struct cik_sdma_rlc_registers),
- mqd_mem_obj);
-
- if (retval != 0)
+ *mqd_mem_obj = allocate_sdma_mqd(dev, q);
+ if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
@@ -163,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd,
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
-static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
uint32_t queue_id, struct queue_properties *p,
@@ -400,28 +408,43 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+ mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
+ mqd->uninit_mqd = uninit_mqd_hiq_sdma;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd_hiq;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+ mqd->mqd_size = sizeof(struct cik_mqd);
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_DIQ:
+ mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+ mqd->mqd_size = sizeof(struct cik_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_SDMA:
mqd->init_mqd = init_mqd_sdma;
- mqd->uninit_mqd = uninit_mqd_sdma;
+ mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = destroy_mqd_sdma;
mqd->is_occupied = is_occupied_sdma;
+ mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 9dbba609450e..4750338199b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -67,33 +67,54 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
}
-static int init_mqd(struct mqd_manager *mm, void **mqd,
- struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
- struct queue_properties *q)
+static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+ struct queue_properties *q)
{
int retval;
- uint64_t addr;
- struct v9_mqd *m;
- struct kfd_dev *kfd = mm->dev;
+ struct kfd_mem_obj *mqd_mem_obj = NULL;
+
+ if (q->type == KFD_QUEUE_TYPE_HIQ)
+ return allocate_hiq_mqd(kfd);
/* From V9, for CWSR, the control stack is located on the next page
* boundary after the mqd, we will use the gtt allocation function
* instead of sub-allocation function.
*/
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
- *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
- if (!*mqd_mem_obj)
- return -ENOMEM;
+ mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
+ if (!mqd_mem_obj)
+ return NULL;
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
- &((*mqd_mem_obj)->gtt_mem),
- &((*mqd_mem_obj)->gpu_addr),
- (void *)&((*mqd_mem_obj)->cpu_ptr), true);
- } else
- retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
- mqd_mem_obj);
- if (retval != 0)
+ &(mqd_mem_obj->gtt_mem),
+ &(mqd_mem_obj->gpu_addr),
+ (void *)&(mqd_mem_obj->cpu_ptr), true);
+ } else {
+ retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
+ &mqd_mem_obj);
+ }
+
+ if (retval) {
+ kfree(mqd_mem_obj);
+ return NULL;
+ }
+
+ return mqd_mem_obj;
+
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ int retval;
+ uint64_t addr;
+ struct v9_mqd *m;
+ struct kfd_dev *kfd = mm->dev;
+
+ *mqd_mem_obj = allocate_mqd(kfd, q);
+ if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -328,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
{
int retval;
struct v9_sdma_mqd *m;
+ struct kfd_dev *dev = mm->dev;
-
- retval = kfd_gtt_sa_allocate(mm->dev,
- sizeof(struct v9_sdma_mqd),
- mqd_mem_obj);
-
- if (retval != 0)
+ *mqd_mem_obj = allocate_sdma_mqd(dev, q);
+ if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -350,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
return retval;
}
-static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
@@ -459,28 +471,43 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->get_wave_state = get_wave_state;
+ mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
+ mqd->uninit_mqd = uninit_mqd_hiq_sdma;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd_hiq;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+ mqd->mqd_size = sizeof(struct v9_mqd);
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_DIQ:
+ mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+ mqd->mqd_size = sizeof(struct v9_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_SDMA:
mqd->init_mqd = init_mqd_sdma;
- mqd->uninit_mqd = uninit_mqd_sdma;
+ mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = destroy_mqd_sdma;
mqd->is_occupied = is_occupied_sdma;
+ mqd->mqd_size = sizeof(struct v9_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 6469b3456f00..b550dea9b10a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -68,6 +68,21 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
}
+static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+ struct queue_properties *q)
+{
+ struct kfd_mem_obj *mqd_mem_obj;
+
+ if (q->type == KFD_QUEUE_TYPE_HIQ)
+ return allocate_hiq_mqd(kfd);
+
+ if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd),
+ &mqd_mem_obj))
+ return NULL;
+
+ return mqd_mem_obj;
+}
+
static int init_mqd(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -75,10 +90,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
int retval;
uint64_t addr;
struct vi_mqd *m;
+ struct kfd_dev *kfd = mm->dev;
- retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
- mqd_mem_obj);
- if (retval != 0)
+ *mqd_mem_obj = allocate_mqd(kfd, q);
+ if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -329,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
{
int retval;
struct vi_sdma_mqd *m;
+ struct kfd_dev *dev = mm->dev;
-
- retval = kfd_gtt_sa_allocate(mm->dev,
- sizeof(struct vi_sdma_mqd),
- mqd_mem_obj);
-
- if (retval != 0)
+ *mqd_mem_obj = allocate_sdma_mqd(dev, q);
+ if (!*mqd_mem_obj)
return -ENOMEM;
m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -343,7 +355,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
memset(m, 0, sizeof(struct vi_sdma_mqd));
*mqd = m;
- if (gart_addr != NULL)
+ if (gart_addr)
*gart_addr = (*mqd_mem_obj)->gpu_addr;
retval = mm->update_mqd(mm, m, q);
@@ -351,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
return retval;
}
-static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
- struct kfd_mem_obj *mqd_mem_obj)
-{
- kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
@@ -459,28 +465,43 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->get_wave_state = get_wave_state;
+ mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_HIQ:
mqd->init_mqd = init_mqd_hiq;
+ mqd->uninit_mqd = uninit_mqd_hiq_sdma;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd_hiq;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+ mqd->mqd_size = sizeof(struct vi_mqd);
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_DIQ:
+ mqd->init_mqd = init_mqd_hiq;
mqd->uninit_mqd = uninit_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd_hiq;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
+ mqd->mqd_size = sizeof(struct vi_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
break;
case KFD_MQD_TYPE_SDMA:
mqd->init_mqd = init_mqd_sdma;
- mqd->uninit_mqd = uninit_mqd_sdma;
+ mqd->uninit_mqd = uninit_mqd_hiq_sdma;
mqd->load_mqd = load_mqd_sdma;
mqd->update_mqd = update_mqd_sdma;
mqd->destroy_mqd = destroy_mqd_sdma;
mqd->is_occupied = is_occupied_sdma;
+ mqd->mqd_size = sizeof(struct vi_sdma_mqd);
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 045a229436a0..808194663a7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
process_count = pm->dqm->processes_count;
queue_count = pm->dqm->queue_count;
- compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
+ compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
+ pm->dqm->xgmi_sdma_queue_count;
/* check if there is over subscription
* Note: the arbitration between the number of VMIDs and
@@ -227,6 +228,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
pm->pmf = &kfd_vi_pm_funcs;
break;
case CHIP_VEGA10:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
index f2bcf5c092ea..49ab66b703fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -176,8 +176,7 @@ struct pm4_mes_map_process {
union {
struct {
- uint32_t num_gws:6;
- uint32_t reserved7:1;
+ uint32_t num_gws:7;
uint32_t sdma_enable:1;
uint32_t num_oac:4;
uint32_t reserved8:4;
@@ -255,11 +254,6 @@ enum mes_map_queues_queue_type_enum {
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
};
-enum mes_map_queues_alloc_format_enum {
- alloc_format__mes_map_queues__one_per_pipe_vi = 0,
-alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
-};
-
enum mes_map_queues_engine_sel_enum {
engine_sel__mes_map_queues__compute_vi = 0,
engine_sel__mes_map_queues__sdma0_vi = 2,
@@ -277,9 +271,11 @@ struct pm4_mes_map_queues {
struct {
uint32_t reserved1:4;
enum mes_map_queues_queue_sel_enum queue_sel:2;
- uint32_t reserved2:15;
+ uint32_t reserved5:6;
+ uint32_t gws_control_queue:1;
+ uint32_t reserved2:8;
enum mes_map_queues_queue_type_enum queue_type:3;
- enum mes_map_queues_alloc_format_enum alloc_format:2;
+ uint32_t reserved3:2;
enum mes_map_queues_engine_sel_enum engine_sel:3;
uint32_t num_queues:3;
} bitfields2;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
index 7c8d9b357749..5466cfe1c3cc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -216,11 +216,6 @@ enum mes_map_queues_queue_type_vi_enum {
queue_type__mes_map_queues__low_latency_static_queue_vi = 3
};
-enum mes_map_queues_alloc_format_vi_enum {
- alloc_format__mes_map_queues__one_per_pipe_vi = 0,
-alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
-};
-
enum mes_map_queues_engine_sel_vi_enum {
engine_sel__mes_map_queues__compute_vi = 0,
engine_sel__mes_map_queues__sdma0_vi = 2,
@@ -240,7 +235,7 @@ struct pm4_mes_map_queues {
enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
uint32_t reserved2:15;
enum mes_map_queues_queue_type_vi_enum queue_type:3;
- enum mes_map_queues_alloc_format_vi_enum alloc_format:2;
+ uint32_t reserved3:2;
enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
uint32_t num_queues:3;
} bitfields2;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 487d5da337c1..b61dc53f42d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -59,6 +59,7 @@
#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
@@ -160,6 +161,11 @@ extern int noretry;
*/
extern int halt_if_hws_hang;
+/*
+ * Whether MEC FW support GWS barriers
+ */
+extern bool hws_gws_support;
+
enum cache_policy {
cache_policy_coherent,
cache_policy_noncoherent
@@ -188,6 +194,7 @@ struct kfd_device_info {
bool needs_iommu_device;
bool needs_pci_atomics;
unsigned int num_sdma_engines;
+ unsigned int num_xgmi_sdma_engines;
unsigned int num_sdma_queues_per_engine;
};
@@ -258,7 +265,7 @@ struct kfd_dev {
bool interrupts_active;
/* Debug manager */
- struct kfd_dbgmgr *dbgmgr;
+ struct kfd_dbgmgr *dbgmgr;
/* Firmware versions */
uint16_t mec_fw_version;
@@ -282,6 +289,9 @@ struct kfd_dev {
/* Compute Profile ref. count */
atomic_t compute_profile;
+
+ /* Global GWS resource shared b/t processes*/
+ void *gws;
};
enum kfd_mempool {
@@ -329,7 +339,8 @@ enum kfd_queue_type {
KFD_QUEUE_TYPE_COMPUTE,
KFD_QUEUE_TYPE_SDMA,
KFD_QUEUE_TYPE_HIQ,
- KFD_QUEUE_TYPE_DIQ
+ KFD_QUEUE_TYPE_DIQ,
+ KFD_QUEUE_TYPE_SDMA_XGMI
};
enum kfd_queue_format {
@@ -444,6 +455,9 @@ struct queue_properties {
*
* @device: The kfd device that created this queue.
*
+ * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
+ * otherwise.
+ *
* This structure represents user mode compute queues.
* It contains all the necessary data to handle such queues.
*
@@ -465,6 +479,7 @@ struct queue {
struct kfd_process *process;
struct kfd_dev *device;
+ void *gws;
};
/*
@@ -475,6 +490,7 @@ enum KFD_MQD_TYPE {
KFD_MQD_TYPE_HIQ, /* for hiq */
KFD_MQD_TYPE_CP, /* for cp queues and diq */
KFD_MQD_TYPE_SDMA, /* for sdma queues */
+ KFD_MQD_TYPE_DIQ, /* for diq */
KFD_MQD_TYPE_MAX
};
@@ -819,8 +835,6 @@ void uninit_queue(struct queue *q);
void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
-struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
@@ -859,6 +873,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
+int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
+ void *gws);
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
unsigned int qid);
int pqm_get_wave_state(struct process_queue_manager *pqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index fcaaf93681ac..c2c570e6e54f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -26,6 +26,7 @@
#include "kfd_device_queue_manager.h"
#include "kfd_priv.h"
#include "kfd_kernel_queue.h"
+#include "amdgpu_amdkfd.h"
static inline struct process_queue_node *get_queue_by_qid(
struct process_queue_manager *pqm, unsigned int qid)
@@ -74,6 +75,55 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
pdd->already_dequeued = true;
}
+int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
+ void *gws)
+{
+ struct kfd_dev *dev = NULL;
+ struct process_queue_node *pqn;
+ struct kfd_process_device *pdd;
+ struct kgd_mem *mem = NULL;
+ int ret;
+
+ pqn = get_queue_by_qid(pqm, qid);
+ if (!pqn) {
+ pr_err("Queue id does not match any known queue\n");
+ return -EINVAL;
+ }
+
+ if (pqn->q)
+ dev = pqn->q->device;
+ if (WARN_ON(!dev))
+ return -ENODEV;
+
+ pdd = kfd_get_process_device_data(dev, pqm->process);
+ if (!pdd) {
+ pr_err("Process device data doesn't exist\n");
+ return -EINVAL;
+ }
+
+ /* Only allow one queue per process can have GWS assigned */
+ if (gws && pdd->qpd.num_gws)
+ return -EINVAL;
+
+ if (!gws && pdd->qpd.num_gws == 0)
+ return -EINVAL;
+
+ if (gws)
+ ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
+ gws, &mem);
+ else
+ ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
+ pqn->q->gws);
+ if (unlikely(ret))
+ return ret;
+
+ pqn->q->gws = mem;
+ pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0;
+
+ return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
+ pqn->q);
+}
+
void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
{
struct kfd_process_device *pdd;
@@ -186,8 +236,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,
switch (type) {
case KFD_QUEUE_TYPE_SDMA:
- if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) {
- pr_err("Over-subscription is not allowed for SDMA.\n");
+ case KFD_QUEUE_TYPE_SDMA_XGMI:
+ if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count
+ >= get_num_sdma_queues(dev->dqm)) ||
+ (type == KFD_QUEUE_TYPE_SDMA_XGMI &&
+ dev->dqm->xgmi_sdma_queue_count
+ >= get_num_xgmi_sdma_queues(dev->dqm))) {
+ pr_debug("Over-subscription is not allowed for SDMA.\n");
retval = -EPERM;
goto err_create_queue;
}
@@ -325,6 +380,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
if (retval != -ETIME)
goto err_destroy_queue;
}
+
+ if (pqn->q->gws) {
+ amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
+ pqn->q->gws);
+ pdd->qpd.num_gws = 0;
+ }
+
kfree(pqn->q->properties.cu_mask);
pqn->q->properties.cu_mask = NULL;
uninit_queue(pqn->q);
@@ -446,6 +508,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
q = pqn->q;
switch (q->properties.type) {
case KFD_QUEUE_TYPE_SDMA:
+ case KFD_QUEUE_TYPE_SDMA_XGMI:
seq_printf(m, " SDMA queue on device %x\n",
q->device->id);
mqd_type = KFD_MQD_TYPE_SDMA;
@@ -461,8 +524,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
q->properties.type, q->device->id);
continue;
}
- mqd_mgr = q->device->dqm->ops.get_mqd_manager(
- q->device->dqm, mqd_type);
+ mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
} else if (pqn->kq) {
q = pqn->kq->queue;
mqd_mgr = pqn->kq->mqd_mgr;
@@ -470,7 +532,6 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
case KFD_QUEUE_TYPE_DIQ:
seq_printf(m, " DIQ on device %x\n",
pqn->kq->dev->id);
- mqd_type = KFD_MQD_TYPE_HIQ;
break;
default:
seq_printf(m,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 769dbc7be8cb..d241a8672599 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.lds_size_in_kb);
sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
dev->node_props.gds_size_in_kb);
+ sysfs_show_32bit_prop(buffer, "num_gws",
+ dev->node_props.num_gws);
sysfs_show_32bit_prop(buffer, "wave_front_size",
dev->node_props.wave_front_size);
sysfs_show_32bit_prop(buffer, "array_count",
@@ -476,6 +478,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.drm_render_minor);
sysfs_show_64bit_prop(buffer, "hive_id",
dev->node_props.hive_id);
+ sysfs_show_32bit_prop(buffer, "num_sdma_engines",
+ dev->node_props.num_sdma_engines);
+ sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines",
+ dev->node_props.num_sdma_xgmi_engines);
if (dev->gpu) {
log_max_watch_addr =
@@ -1078,8 +1084,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
local_mem_info.local_mem_size_public;
buf[0] = gpu->pdev->devfn;
- buf[1] = gpu->pdev->subsystem_vendor;
- buf[2] = gpu->pdev->subsystem_device;
+ buf[1] = gpu->pdev->subsystem_vendor |
+ (gpu->pdev->subsystem_device << 16);
+ buf[2] = pci_domain_nr(gpu->pdev->bus);
buf[3] = gpu->pdev->device;
buf[4] = gpu->pdev->bus->number;
buf[5] = lower_32_bits(local_mem_size);
@@ -1281,6 +1288,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
gpu->shared_resources.drm_render_minor;
dev->node_props.hive_id = gpu->hive_id;
+ dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
+ dev->node_props.num_sdma_xgmi_engines =
+ gpu->device_info->num_xgmi_sdma_engines;
+ dev->node_props.num_gws = (hws_gws_support &&
+ dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
+ amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
kfd_fill_mem_clk_max_info(dev);
kfd_fill_iolink_non_crat_info(dev);
@@ -1298,6 +1311,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
pr_debug("Adding doorbell packet type capability\n");
dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 84710cfd23c2..276354aa0fcc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -65,6 +65,7 @@ struct kfd_node_properties {
uint32_t max_waves_per_simd;
uint32_t lds_size_in_kb;
uint32_t gds_size_in_kb;
+ uint32_t num_gws;
uint32_t wave_front_size;
uint32_t array_count;
uint32_t simd_arrays_per_engine;
@@ -78,6 +79,8 @@ struct kfd_node_properties {
uint32_t max_engine_clk_fcompute;
uint32_t max_engine_clk_ccompute;
int32_t drm_render_minor;
+ uint32_t num_sdma_engines;
+ uint32_t num_sdma_xgmi_engines;
uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
};
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 0c25baded852..5c826faae240 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,6 @@ config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
select DRM_AMD_DC_DCN1_0 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
- select DRM_AMD_DC_DCN1_01 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
@@ -17,11 +16,6 @@ config DRM_AMD_DC_DCN1_0
help
RV family support for display engine
-config DRM_AMD_DC_DCN1_01
- def_bool n
- help
- RV2 family for display engine
-
config DEBUG_KERNEL_DC
bool "Enable kgdb break in DC"
depends on DRM_AMD_DC
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 995f9df66142..53b76e0de940 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -29,6 +29,7 @@
#include "dm_services_types.h"
#include "dc.h"
#include "dc/inc/core_types.h"
+#include "dal_asic_id.h"
#include "vid.h"
#include "amdgpu.h"
@@ -615,6 +616,10 @@ error:
static void amdgpu_dm_fini(struct amdgpu_device *adev)
{
amdgpu_dm_destroy_drm_device(&adev->dm);
+
+ /* DC Destroy TODO: Replace destroy DAL */
+ if (adev->dm.dc)
+ dc_destroy(&adev->dm.dc);
/*
* TODO: pageflip, vlank interrupt
*
@@ -629,9 +634,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
mod_freesync_destroy(adev->dm.freesync_module);
adev->dm.freesync_module = NULL;
}
- /* DC Destroy TODO: Replace destroy DAL */
- if (adev->dm.dc)
- dc_destroy(&adev->dm.dc);
mutex_destroy(&adev->dm.dc_lock);
@@ -640,7 +642,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
static int load_dmcu_fw(struct amdgpu_device *adev)
{
- const char *fw_name_dmcu;
+ const char *fw_name_dmcu = NULL;
int r;
const struct dmcu_firmware_header_v1_0 *hdr;
@@ -663,7 +665,14 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
case CHIP_VEGA20:
return 0;
case CHIP_RAVEN:
- fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
+#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
+ if (ASICREV_IS_PICASSO(adev->external_rev_id))
+ fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
+ else if (ASICREV_IS_RAVEN2(adev->external_rev_id))
+ fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
+ else
+#endif
+ return 0;
break;
default:
DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
@@ -2584,7 +2593,7 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
address->type = PLN_ADDR_TYPE_GRAPHICS;
address->grph.addr.low_part = lower_32_bits(afb->address);
address->grph.addr.high_part = upper_32_bits(afb->address);
- } else {
+ } else if (format < SURFACE_PIXEL_FORMAT_INVALID) {
uint64_t chroma_addr = afb->address + fb->offsets[1];
plane_size->video.luma_size.x = 0;
@@ -2959,16 +2968,16 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode,
}
static enum dc_color_depth
-convert_color_depth_from_display_info(const struct drm_connector *connector)
+convert_color_depth_from_display_info(const struct drm_connector *connector,
+ const struct drm_connector_state *state)
{
- struct dm_connector_state *dm_conn_state =
- to_dm_connector_state(connector->state);
uint32_t bpc = connector->display_info.bpc;
- /* TODO: Remove this when there's support for max_bpc in drm */
- if (dm_conn_state && bpc > dm_conn_state->max_bpc)
- /* Round down to nearest even number. */
- bpc = dm_conn_state->max_bpc - (dm_conn_state->max_bpc & 1);
+ if (state) {
+ bpc = state->max_bpc;
+ /* Round down to the nearest even number. */
+ bpc = bpc - (bpc & 1);
+ }
switch (bpc) {
case 0:
@@ -3086,11 +3095,12 @@ static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_
}
-static void
-fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream,
- const struct drm_display_mode *mode_in,
- const struct drm_connector *connector,
- const struct dc_stream_state *old_stream)
+static void fill_stream_properties_from_drm_display_mode(
+ struct dc_stream_state *stream,
+ const struct drm_display_mode *mode_in,
+ const struct drm_connector *connector,
+ const struct drm_connector_state *connector_state,
+ const struct dc_stream_state *old_stream)
{
struct dc_crtc_timing *timing_out = &stream->timing;
const struct drm_display_info *info = &connector->display_info;
@@ -3113,7 +3123,7 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream,
timing_out->timing_3d_format = TIMING_3D_FORMAT_NONE;
timing_out->display_color_depth = convert_color_depth_from_display_info(
- connector);
+ connector, connector_state);
timing_out->scan_type = SCANNING_TYPE_NODATA;
timing_out->hdmi_vic = 0;
@@ -3310,6 +3320,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
{
struct drm_display_mode *preferred_mode = NULL;
struct drm_connector *drm_connector;
+ const struct drm_connector_state *con_state =
+ dm_state ? &dm_state->base : NULL;
struct dc_stream_state *stream = NULL;
struct drm_display_mode mode = *drm_mode;
bool native_mode_found = false;
@@ -3382,10 +3394,10 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
*/
if (!scale || mode_refresh != preferred_refresh)
fill_stream_properties_from_drm_display_mode(stream,
- &mode, &aconnector->base, NULL);
+ &mode, &aconnector->base, con_state, NULL);
else
fill_stream_properties_from_drm_display_mode(stream,
- &mode, &aconnector->base, old_stream);
+ &mode, &aconnector->base, con_state, old_stream);
update_stream_scaling_settings(&mode, dm_state, stream);
@@ -3610,9 +3622,6 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector,
} else if (property == adev->mode_info.underscan_property) {
dm_new_state->underscan_enable = val;
ret = 0;
- } else if (property == adev->mode_info.max_bpc_property) {
- dm_new_state->max_bpc = val;
- ret = 0;
} else if (property == adev->mode_info.abm_level_property) {
dm_new_state->abm_level = val;
ret = 0;
@@ -3658,9 +3667,6 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector,
} else if (property == adev->mode_info.underscan_property) {
*val = dm_state->underscan_enable;
ret = 0;
- } else if (property == adev->mode_info.max_bpc_property) {
- *val = dm_state->max_bpc;
- ret = 0;
} else if (property == adev->mode_info.abm_level_property) {
*val = dm_state->abm_level;
ret = 0;
@@ -3717,7 +3723,6 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)
state->underscan_enable = false;
state->underscan_hborder = 0;
state->underscan_vborder = 0;
- state->max_bpc = 8;
__drm_atomic_helper_connector_reset(connector, &state->base);
}
@@ -3743,7 +3748,6 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector)
new_state->underscan_enable = state->underscan_enable;
new_state->underscan_hborder = state->underscan_hborder;
new_state->underscan_vborder = state->underscan_vborder;
- new_state->max_bpc = state->max_bpc;
return &new_state->base;
}
@@ -4585,6 +4589,15 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
amdgpu_dm_connector->num_modes =
drm_add_edid_modes(connector, edid);
+ /* sorting the probed modes before calling function
+ * amdgpu_dm_get_native_mode() since EDID can have
+ * more than one preferred mode. The modes that are
+ * later in the probed mode list could be of higher
+ * and preferred resolution. For example, 3840x2160
+ * resolution in base EDID preferred timing and 4096x2160
+ * preferred resolution in DID extension block later.
+ */
+ drm_mode_sort(&connector->probed_modes);
amdgpu_dm_get_native_mode(connector);
} else {
amdgpu_dm_connector->num_modes = 0;
@@ -4664,9 +4677,12 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
drm_object_attach_property(&aconnector->base.base,
adev->mode_info.underscan_vborder_property,
0);
- drm_object_attach_property(&aconnector->base.base,
- adev->mode_info.max_bpc_property,
- 0);
+
+ drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16);
+
+ /* This defaults to the max in the range, but we want 8bpc. */
+ aconnector->base.state->max_bpc = 8;
+ aconnector->base.state->max_requested_bpc = 8;
if (connector_type == DRM_MODE_CONNECTOR_eDP &&
dc_is_dmcu_initialized(adev->dm.dc)) {
@@ -4945,12 +4961,12 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
int x, y;
int xorigin = 0, yorigin = 0;
- if (!crtc || !plane->state->fb) {
- position->enable = false;
- position->x = 0;
- position->y = 0;
+ position->enable = false;
+ position->x = 0;
+ position->y = 0;
+
+ if (!crtc || !plane->state->fb)
return 0;
- }
if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) ||
(plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) {
@@ -4964,6 +4980,10 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
x = plane->state->crtc_x;
y = plane->state->crtc_y;
+ if (x <= -amdgpu_crtc->max_cursor_width ||
+ y <= -amdgpu_crtc->max_cursor_height)
+ return 0;
+
if (crtc->primary->state) {
/* avivo cursor are offset into the total surface */
x += crtc->primary->state->src_x >> 16;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 978ff14a7d45..b0ce44422e90 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -304,7 +304,6 @@ struct dm_connector_state {
enum amdgpu_rmx_type scaling;
uint8_t underscan_vborder;
uint8_t underscan_hborder;
- uint8_t max_bpc;
bool underscan_enable;
bool freesync_capable;
uint8_t abm_level;
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index fd5266a58297..12bc7ee66b18 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1313,6 +1313,8 @@ static enum bp_result bios_parser_get_encoder_cap_info(
ATOM_ENCODER_CAP_RECORD_HBR3_EN) ? 1 : 0;
info->HDMI_6GB_EN = (record->encodercaps &
ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN) ? 1 : 0;
+ info->DP_IS_USB_C = (record->encodercaps &
+ ATOM_ENCODER_CAP_RECORD_USB_C_TYPE) ? 1 : 0;
return BP_RESULT_OK;
}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
index 8196f3bb10c7..53deba42007a 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
@@ -57,11 +57,6 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
return true;
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
case DCN_VERSION_1_0:
- *h = dal_cmd_tbl_helper_dce112_get_table2();
- return true;
-#endif
-
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
case DCN_VERSION_1_01:
*h = dal_cmd_tbl_helper_dce112_get_table2();
return true;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 18c775a950cc..4e17af2b63dc 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -169,9 +169,14 @@ static bool create_links(
link = link_create(&link_init_params);
if (link) {
- dc->links[dc->link_count] = link;
- link->dc = dc;
- ++dc->link_count;
+ if (dc->config.edp_not_connected &&
+ link->connector_signal == SIGNAL_TYPE_EDP) {
+ link_destroy(&link);
+ } else {
+ dc->links[dc->link_count] = link;
+ link->dc = dc;
+ ++dc->link_count;
+ }
}
}
@@ -1136,10 +1141,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
/* Program all planes within new context*/
for (i = 0; i < context->stream_count; i++) {
const struct dc_link *link = context->streams[i]->link;
- struct dc_stream_status *status;
-
- if (context->streams[i]->apply_seamless_boot_optimization)
- context->streams[i]->apply_seamless_boot_optimization = false;
if (!context->streams[i]->mode_changed)
continue;
@@ -1164,9 +1165,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
}
}
- status = dc_stream_get_status_from_state(context, context->streams[i]);
- context->streams[i]->out.otg_offset = status->primary_otg_inst;
-
CONN_MSG_MODE(link, "{%dx%d, %dx%d@%dKhz}",
context->streams[i]->timing.h_addressable,
context->streams[i]->timing.v_addressable,
@@ -1331,71 +1329,94 @@ static bool is_surface_in_context(
static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u)
{
union surface_update_flags *update_flags = &u->surface->update_flags;
+ enum surface_update_type update_type = UPDATE_TYPE_FAST;
if (!u->plane_info)
return UPDATE_TYPE_FAST;
- if (u->plane_info->color_space != u->surface->color_space)
+ if (u->plane_info->color_space != u->surface->color_space) {
update_flags->bits.color_space_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ }
- if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror)
+ if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) {
update_flags->bits.horizontal_mirror_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ }
- if (u->plane_info->rotation != u->surface->rotation)
+ if (u->plane_info->rotation != u->surface->rotation) {
update_flags->bits.rotation_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+ }
- if (u->plane_info->format != u->surface->format)
+ if (u->plane_info->format != u->surface->format) {
update_flags->bits.pixel_format_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+ }
- if (u->plane_info->stereo_format != u->surface->stereo_format)
+ if (u->plane_info->stereo_format != u->surface->stereo_format) {
update_flags->bits.stereo_format_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+ }
- if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha)
+ if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) {
update_flags->bits.per_pixel_alpha_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ }
- if (u->plane_info->global_alpha_value != u->surface->global_alpha_value)
+ if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) {
update_flags->bits.global_alpha_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ }
+
+ if (u->plane_info->sdr_white_level != u->surface->sdr_white_level) {
+ update_flags->bits.sdr_white_level = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ }
if (u->plane_info->dcc.enable != u->surface->dcc.enable
|| u->plane_info->dcc.grph.independent_64b_blks != u->surface->dcc.grph.independent_64b_blks
- || u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch)
+ || u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) {
update_flags->bits.dcc_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ }
if (resource_pixel_format_to_bpp(u->plane_info->format) !=
- resource_pixel_format_to_bpp(u->surface->format))
+ resource_pixel_format_to_bpp(u->surface->format)) {
/* different bytes per element will require full bandwidth
* and DML calculation
*/
update_flags->bits.bpp_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+ }
if (u->plane_info->plane_size.grph.surface_pitch != u->surface->plane_size.grph.surface_pitch
|| u->plane_info->plane_size.video.luma_pitch != u->surface->plane_size.video.luma_pitch
- || u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch)
+ || u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch) {
update_flags->bits.plane_size_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_MED);
+ }
if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info,
sizeof(union dc_tiling_info)) != 0) {
update_flags->bits.swizzle_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_MED);
+
/* todo: below are HW dependent, we should add a hook to
* DCE/N resource and validated there.
*/
- if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR)
+ if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) {
/* swizzled mode requires RQ to be setup properly,
* thus need to run DML to calculate RQ settings
*/
update_flags->bits.bandwidth_change = 1;
+ elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+ }
}
- if (update_flags->bits.rotation_change
- || update_flags->bits.stereo_format_change
- || update_flags->bits.pixel_format_change
- || update_flags->bits.bpp_change
- || update_flags->bits.bandwidth_change
- || update_flags->bits.output_tf_change)
- return UPDATE_TYPE_FULL;
-
- return update_flags->raw ? UPDATE_TYPE_MED : UPDATE_TYPE_FAST;
+ /* This should be UPDATE_TYPE_FAST if nothing has changed. */
+ return update_type;
}
static enum surface_update_type get_scaling_info_update_type(
@@ -1475,6 +1496,9 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
type = get_scaling_info_update_type(u);
elevate_update_type(&overall_type, type);
+ if (u->flip_addr)
+ update_flags->bits.addr_update = 1;
+
if (u->in_transfer_func)
update_flags->bits.in_transfer_func_change = 1;
@@ -1792,10 +1816,15 @@ static void commit_planes_for_stream(struct dc *dc,
if (dc->optimize_seamless_boot && surface_count > 0) {
/* Optimize seamless boot flag keeps clocks and watermarks high until
* first flip. After first flip, optimization is required to lower
- * bandwidth.
+ * bandwidth. Important to note that it is expected UEFI will
+ * only light up a single display on POST, therefore we only expect
+ * one stream with seamless boot flag set.
*/
- dc->optimize_seamless_boot = false;
- dc->optimized_required = true;
+ if (stream->apply_seamless_boot_optimization) {
+ stream->apply_seamless_boot_optimization = false;
+ dc->optimize_seamless_boot = false;
+ dc->optimized_required = true;
+ }
}
if (update_type == UPDATE_TYPE_FULL && !dc->optimize_seamless_boot) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 83d121510ef5..ca50ede37183 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -45,8 +45,10 @@ enum dc_color_space_type {
COLOR_SPACE_RGB_LIMITED_TYPE,
COLOR_SPACE_YCBCR601_TYPE,
COLOR_SPACE_YCBCR709_TYPE,
+ COLOR_SPACE_YCBCR2020_TYPE,
COLOR_SPACE_YCBCR601_LIMITED_TYPE,
- COLOR_SPACE_YCBCR709_LIMITED_TYPE
+ COLOR_SPACE_YCBCR709_LIMITED_TYPE,
+ COLOR_SPACE_YCBCR709_BLACK_TYPE,
};
static const struct tg_color black_color_format[] = {
@@ -80,7 +82,6 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = {
{ COLOR_SPACE_YCBCR709_TYPE,
{ 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA,
0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} },
-
/* TODO: correct values below */
{ COLOR_SPACE_YCBCR601_LIMITED_TYPE,
{ 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991,
@@ -88,6 +89,12 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = {
{ COLOR_SPACE_YCBCR709_LIMITED_TYPE,
{ 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3,
0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} },
+ { COLOR_SPACE_YCBCR2020_TYPE,
+ { 0x1000, 0xF149, 0xFEB7, 0x0000, 0x0868, 0x15B2,
+ 0x01E6, 0x0000, 0xFB88, 0xF478, 0x1000, 0x0000} },
+ { COLOR_SPACE_YCBCR709_BLACK_TYPE,
+ { 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000,
+ 0x0000, 0x0200, 0x0000, 0x0000, 0x0000, 0x1000} },
};
static bool is_rgb_type(
@@ -149,6 +156,16 @@ static bool is_ycbcr709_type(
return ret;
}
+static bool is_ycbcr2020_type(
+ enum dc_color_space color_space)
+{
+ bool ret = false;
+
+ if (color_space == COLOR_SPACE_2020_YCBCR)
+ ret = true;
+ return ret;
+}
+
static bool is_ycbcr709_limited_type(
enum dc_color_space color_space)
{
@@ -174,7 +191,12 @@ enum dc_color_space_type get_color_space_type(enum dc_color_space color_space)
type = COLOR_SPACE_YCBCR601_LIMITED_TYPE;
else if (is_ycbcr709_limited_type(color_space))
type = COLOR_SPACE_YCBCR709_LIMITED_TYPE;
-
+ else if (is_ycbcr2020_type(color_space))
+ type = COLOR_SPACE_YCBCR2020_TYPE;
+ else if (color_space == COLOR_SPACE_YCBCR709)
+ type = COLOR_SPACE_YCBCR709_BLACK_TYPE;
+ else if (color_space == COLOR_SPACE_YCBCR709_BLACK)
+ type = COLOR_SPACE_YCBCR709_BLACK_TYPE;
return type;
}
@@ -206,6 +228,7 @@ void color_space_to_black_color(
switch (colorspace) {
case COLOR_SPACE_YCBCR601:
case COLOR_SPACE_YCBCR709:
+ case COLOR_SPACE_YCBCR709_BLACK:
case COLOR_SPACE_YCBCR601_LIMITED:
case COLOR_SPACE_YCBCR709_LIMITED:
case COLOR_SPACE_2020_YCBCR:
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index b37ecc3ede61..e7236539f867 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -704,6 +704,7 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
if (new_connection_type != dc_connection_none) {
link->type = new_connection_type;
+ link->link_state_valid = false;
/* From Disconnected-to-Connected. */
switch (link->connector_signal) {
@@ -906,10 +907,10 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
/* Connectivity log: detection */
- for (i = 0; i < sink->dc_edid.length / EDID_BLOCK_SIZE; i++) {
+ for (i = 0; i < sink->dc_edid.length / DC_EDID_BLOCK_SIZE; i++) {
CONN_DATA_DETECT(link,
- &sink->dc_edid.raw_edid[i * EDID_BLOCK_SIZE],
- EDID_BLOCK_SIZE,
+ &sink->dc_edid.raw_edid[i * DC_EDID_BLOCK_SIZE],
+ DC_EDID_BLOCK_SIZE,
"%s: [Block %d] ", sink->edid_caps.display_name, i);
}
@@ -2631,6 +2632,8 @@ void core_link_enable_stream(
stream->phy_pix_clk,
pipe_ctx->stream_res.audio != NULL);
+ pipe_ctx->stream->link->link_state_valid = true;
+
if (dc_is_dvi_signal(pipe_ctx->stream->signal))
pipe_ctx->stream_res.stream_enc->funcs->dvi_set_stream_attribute(
pipe_ctx->stream_res.stream_enc,
@@ -2713,17 +2716,37 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option)
{
struct dc *core_dc = pipe_ctx->stream->ctx->dc;
struct dc_stream_state *stream = pipe_ctx->stream;
+ struct dc_link *link = stream->sink->link;
core_dc->hwss.blank_stream(pipe_ctx);
if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
deallocate_mst_payload(pipe_ctx);
- if (dc_is_hdmi_signal(pipe_ctx->stream->signal))
- dal_ddc_service_write_scdc_data(
- stream->link->ddc, 0,
- stream->timing.flags.LTE_340MCSC_SCRAMBLE);
+ if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) {
+ struct ext_hdmi_settings settings = {0};
+ enum engine_id eng_id = pipe_ctx->stream_res.stream_enc->id;
+ unsigned short masked_chip_caps = link->chip_caps &
+ EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
+ //Need to inform that sink is going to use legacy HDMI mode.
+ dal_ddc_service_write_scdc_data(
+ link->ddc,
+ 165000,//vbios only handles 165Mhz.
+ false);
+ if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
+ /* DP159, Retimer settings */
+ if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings))
+ write_i2c_retimer_setting(pipe_ctx,
+ false, false, &settings);
+ else
+ write_i2c_default_retimer_setting(pipe_ctx,
+ false, false);
+ } else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
+ /* PI3EQX1204, Redriver settings */
+ write_i2c_redriver_setting(pipe_ctx, false);
+ }
+ }
core_dc->hwss.disable_stream(pipe_ctx, option);
disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index eac7186e4f08..b7952f39f3fc 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -93,10 +93,8 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
case FAMILY_RV:
dc_version = DCN_VERSION_1_0;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev))
dc_version = DCN_VERSION_1_01;
-#endif
break;
#endif
default:
@@ -147,9 +145,7 @@ struct resource_pool *dc_create_resource_pool(struct dc *dc,
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
case DCN_VERSION_1_0:
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
case DCN_VERSION_1_01:
-#endif
res_pool = dcn10_create_resource_pool(init_data, dc);
break;
#endif
@@ -1184,24 +1180,27 @@ static int acquire_first_split_pipe(
int i;
for (i = 0; i < pool->pipe_count; i++) {
- struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
-
- if (pipe_ctx->top_pipe &&
- pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) {
- pipe_ctx->top_pipe->bottom_pipe = pipe_ctx->bottom_pipe;
- if (pipe_ctx->bottom_pipe)
- pipe_ctx->bottom_pipe->top_pipe = pipe_ctx->top_pipe;
-
- memset(pipe_ctx, 0, sizeof(*pipe_ctx));
- pipe_ctx->stream_res.tg = pool->timing_generators[i];
- pipe_ctx->plane_res.hubp = pool->hubps[i];
- pipe_ctx->plane_res.ipp = pool->ipps[i];
- pipe_ctx->plane_res.dpp = pool->dpps[i];
- pipe_ctx->stream_res.opp = pool->opps[i];
- pipe_ctx->plane_res.mpcc_inst = pool->dpps[i]->inst;
- pipe_ctx->pipe_idx = i;
-
- pipe_ctx->stream = stream;
+ struct pipe_ctx *split_pipe = &res_ctx->pipe_ctx[i];
+
+ if (split_pipe->top_pipe && !dc_res_is_odm_head_pipe(split_pipe) &&
+ split_pipe->top_pipe->plane_state == split_pipe->plane_state) {
+ split_pipe->top_pipe->bottom_pipe = split_pipe->bottom_pipe;
+ if (split_pipe->bottom_pipe)
+ split_pipe->bottom_pipe->top_pipe = split_pipe->top_pipe;
+
+ if (split_pipe->top_pipe->plane_state)
+ resource_build_scaling_params(split_pipe->top_pipe);
+
+ memset(split_pipe, 0, sizeof(*split_pipe));
+ split_pipe->stream_res.tg = pool->timing_generators[i];
+ split_pipe->plane_res.hubp = pool->hubps[i];
+ split_pipe->plane_res.ipp = pool->ipps[i];
+ split_pipe->plane_res.dpp = pool->dpps[i];
+ split_pipe->stream_res.opp = pool->opps[i];
+ split_pipe->plane_res.mpcc_inst = pool->dpps[i]->inst;
+ split_pipe->pipe_idx = i;
+
+ split_pipe->stream = stream;
return i;
}
}
@@ -1647,46 +1646,6 @@ static int acquire_first_free_pipe(
return -1;
}
-static struct stream_encoder *find_first_free_match_stream_enc_for_link(
- struct resource_context *res_ctx,
- const struct resource_pool *pool,
- struct dc_stream_state *stream)
-{
- int i;
- int j = -1;
- struct dc_link *link = stream->link;
-
- for (i = 0; i < pool->stream_enc_count; i++) {
- if (!res_ctx->is_stream_enc_acquired[i] &&
- pool->stream_enc[i]) {
- /* Store first available for MST second display
- * in daisy chain use case */
- j = i;
- if (pool->stream_enc[i]->id ==
- link->link_enc->preferred_engine)
- return pool->stream_enc[i];
- }
- }
-
- /*
- * below can happen in cases when stream encoder is acquired:
- * 1) for second MST display in chain, so preferred engine already
- * acquired;
- * 2) for another link, which preferred engine already acquired by any
- * MST configuration.
- *
- * If signal is of DP type and preferred engine not found, return last available
- *
- * TODO - This is just a patch up and a generic solution is
- * required for non DP connectors.
- */
-
- if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT)
- return pool->stream_enc[j];
-
- return NULL;
-}
-
static struct audio *find_first_free_audio(
struct resource_context *res_ctx,
const struct resource_pool *pool,
@@ -1998,7 +1957,7 @@ enum dc_status resource_map_pool_resources(
pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
pipe_ctx->stream_res.stream_enc =
- find_first_free_match_stream_enc_for_link(
+ dc->res_pool->funcs->find_first_free_match_stream_enc_for_link(
&context->res_ctx, pool, stream);
if (!pipe_ctx->stream_res.stream_enc)
@@ -2354,7 +2313,18 @@ static void set_avi_info_frame(
break;
}
}
+ /* If VIC >= 128, the Source shall use AVI InfoFrame Version 3*/
hdmi_info.bits.VIC0_VIC7 = vic;
+ if (vic >= 128)
+ hdmi_info.bits.header.version = 3;
+ /* If (C1, C0)=(1, 1) and (EC2, EC1, EC0)=(1, 1, 1),
+ * the Source shall use 20 AVI InfoFrame Version 4
+ */
+ if (hdmi_info.bits.C0_C1 == COLORIMETRY_EXTENDED &&
+ hdmi_info.bits.EC0_EC2 == COLORIMETRYEX_RESERVED) {
+ hdmi_info.bits.header.version = 4;
+ hdmi_info.bits.header.length = 14;
+ }
/* pixel repetition
* PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel
@@ -2373,12 +2343,19 @@ static void set_avi_info_frame(
hdmi_info.bits.bar_right = (stream->timing.h_total
- stream->timing.h_border_right + 1);
+ /* Additional Colorimetry Extension
+ * Used in conduction with C0-C1 and EC0-EC2
+ * 0 = DCI-P3 RGB (D65)
+ * 1 = DCI-P3 RGB (theater)
+ */
+ hdmi_info.bits.ACE0_ACE3 = 0;
+
/* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */
check_sum = &hdmi_info.packet_raw_data.sb[0];
- *check_sum = HDMI_INFOFRAME_TYPE_AVI + HDMI_AVI_INFOFRAME_SIZE + 2;
+ *check_sum = HDMI_INFOFRAME_TYPE_AVI + hdmi_info.bits.header.length + hdmi_info.bits.header.version;
- for (byte_index = 1; byte_index <= HDMI_AVI_INFOFRAME_SIZE; byte_index++)
+ for (byte_index = 1; byte_index <= hdmi_info.bits.header.length; byte_index++)
*check_sum += hdmi_info.packet_raw_data.sb[byte_index];
/* one byte complement */
@@ -2425,21 +2402,6 @@ static void set_spd_info_packet(
*info_packet = stream->vrr_infopacket;
}
-static void set_dp_sdp_info_packet(
- struct dc_info_packet *info_packet,
- struct dc_stream_state *stream)
-{
- /* SPD info packet for custom sdp message */
-
- /* Return if false. If true,
- * set the corresponding bit in the info packet
- */
- if (!stream->dpsdp_infopacket.valid)
- return;
-
- *info_packet = stream->dpsdp_infopacket;
-}
-
static void set_hdr_static_info_packet(
struct dc_info_packet *info_packet,
struct dc_stream_state *stream)
@@ -2495,7 +2457,6 @@ void dc_resource_state_copy_construct(
if (cur_pipe->bottom_pipe)
cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
-
}
for (i = 0; i < dst_ctx->stream_count; i++) {
@@ -2536,7 +2497,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
info->spd.valid = false;
info->hdrsmd.valid = false;
info->vsc.valid = false;
- info->dpsdp.valid = false;
signal = pipe_ctx->stream->signal;
@@ -2556,8 +2516,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
set_spd_info_packet(&info->spd, pipe_ctx->stream);
set_hdr_static_info_packet(&info->hdrsmd, pipe_ctx->stream);
-
- set_dp_sdp_info_packet(&info->dpsdp, pipe_ctx->stream);
}
patch_gamut_packet_checksum(&info->gamut);
@@ -2644,6 +2602,10 @@ bool pipe_need_reprogram(
if (is_vsc_info_packet_changed(pipe_ctx_old->stream, pipe_ctx->stream))
return true;
+ if (false == pipe_ctx_old->stream->link->link_state_valid &&
+ false == pipe_ctx_old->stream->dpms_off)
+ return true;
+
return false;
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 96e97d25d639..b723ffc8ea25 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -47,8 +47,8 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink)
if (dc_is_dvi_signal(stream->signal)) {
if (stream->ctx->dc->caps.dual_link_dvi &&
- (stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK &&
- sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)
+ (stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK &&
+ sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)
stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK;
else
stream->signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
@@ -371,42 +371,12 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream)
return 0;
}
-static void build_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx,
- const uint8_t *custom_sdp_message,
- unsigned int sdp_message_size)
-{
- uint8_t i;
- struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame;
-
- /* set valid info */
- info->dpsdp.valid = true;
-
- /* set sdp message header */
- info->dpsdp.hb0 = custom_sdp_message[0]; /* package id */
- info->dpsdp.hb1 = custom_sdp_message[1]; /* package type */
- info->dpsdp.hb2 = custom_sdp_message[2]; /* package specific byte 0 any data */
- info->dpsdp.hb3 = custom_sdp_message[3]; /* package specific byte 0 any data */
-
- /* set sdp message data */
- for (i = 0; i < 32; i++)
- info->dpsdp.sb[i] = (custom_sdp_message[i+4]);
-
-}
-
-static void invalid_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx)
-{
- struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame;
-
- /* in-valid info */
- info->dpsdp.valid = false;
-}
-
bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,
const uint8_t *custom_sdp_message,
unsigned int sdp_message_size)
{
int i;
- struct dc *core_dc;
+ struct dc *dc;
struct resource_context *res_ctx;
if (stream == NULL) {
@@ -414,8 +384,8 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,
return false;
}
- core_dc = stream->ctx->dc;
- res_ctx = &core_dc->current_state->res_ctx;
+ dc = stream->ctx->dc;
+ res_ctx = &dc->current_state->res_ctx;
for (i = 0; i < MAX_PIPES; i++) {
struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
@@ -423,11 +393,14 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,
if (pipe_ctx->stream != stream)
continue;
- build_dp_sdp_info_frame(pipe_ctx, custom_sdp_message, sdp_message_size);
-
- core_dc->hwss.update_info_frame(pipe_ctx);
+ if (dc->hwss.send_immediate_sdp_message != NULL)
+ dc->hwss.send_immediate_sdp_message(pipe_ctx,
+ custom_sdp_message,
+ sdp_message_size);
+ else
+ DC_LOG_WARNING("%s:send_immediate_sdp_message not implemented on this ASIC\n",
+ __func__);
- invalid_dp_sdp_info_frame(pipe_ctx);
}
return true;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 44e4b0465587..566111ff463e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -39,7 +39,7 @@
#include "inc/hw/dmcu.h"
#include "dml/display_mode_lib.h"
-#define DC_VER "3.2.27"
+#define DC_VER "3.2.31"
#define MAX_SURFACES 3
#define MAX_PLANES 6
@@ -205,6 +205,7 @@ struct dc_config {
bool disable_fractional_pwm;
bool allow_seamless_boot_optimization;
bool power_down_display_on_boot;
+ bool edp_not_connected;
};
enum visual_confirm {
@@ -540,12 +541,14 @@ struct dc_plane_status {
union surface_update_flags {
struct {
+ uint32_t addr_update:1;
/* Medium updates */
uint32_t dcc_change:1;
uint32_t color_space_change:1;
uint32_t horizontal_mirror_change:1;
uint32_t per_pixel_alpha_change:1;
uint32_t global_alpha_change:1;
+ uint32_t sdr_white_level:1;
uint32_t rotation_change:1;
uint32_t swizzle_change:1;
uint32_t scaling_change:1;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index 5e6c5eff49cf..2d0acf109360 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -297,7 +297,7 @@ void generic_reg_wait(const struct dc_context *ctx,
int i;
/* something is terribly wrong if time out is > 200ms. (5Hz) */
- ASSERT(delay_between_poll_us * time_out_num_tries <= 200000);
+ ASSERT(delay_between_poll_us * time_out_num_tries <= 3000000);
for (i = 0; i <= time_out_num_tries; i++) {
if (i) {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index da55d623647a..c91b8aad78c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -534,6 +534,7 @@ enum dc_color_space {
COLOR_SPACE_DOLBYVISION,
COLOR_SPACE_APPCTRL,
COLOR_SPACE_CUSTOMPOINTS,
+ COLOR_SPACE_YCBCR709_BLACK,
};
enum dc_dither_option {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index 7b9429e30d82..094009127e25 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -75,6 +75,7 @@ struct dc_link {
enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse */
bool is_hpd_filter_disabled;
bool dp_ss_off;
+ bool link_state_valid;
/* caps is the same as reported_link_cap. link_traing use
* reported_link_cap. Will clean up. TODO
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 6c2a3d9a4c2e..92a670894c05 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -104,7 +104,7 @@ struct dc_context {
#define DC_MAX_EDID_BUFFER_SIZE 1024
-#define EDID_BLOCK_SIZE 128
+#define DC_EDID_BLOCK_SIZE 128
#define MAX_SURFACE_NUM 4
#define NUM_PIXEL_FORMATS 10
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index da96229db53a..2959c3c9390b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -473,6 +473,8 @@ void dce_abm_destroy(struct abm **abm)
{
struct dce_abm *abm_dce = TO_DCE_ABM(*abm);
+ abm_dce->base.funcs->set_abm_immediate_disable(*abm);
+
kfree(abm_dce);
*abm = NULL;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
index 963686380738..6b2e207777f0 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
@@ -241,6 +241,7 @@ static enum dm_pp_clocks_state dce_get_required_clocks_state(
return low_req_clk;
}
+/* TODO: remove use the two broken down functions */
static int dce_set_clock(
struct clk_mgr *clk_mgr,
int requested_clk_khz)
@@ -336,6 +337,75 @@ int dce112_set_clock(struct clk_mgr *clk_mgr, int requested_clk_khz)
return actual_clock;
}
+int dce112_set_dispclk(struct clk_mgr *clk_mgr, int requested_clk_khz)
+{
+ struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr);
+ struct bp_set_dce_clock_parameters dce_clk_params;
+ struct dc_bios *bp = clk_mgr->ctx->dc_bios;
+ struct dc *core_dc = clk_mgr->ctx->dc;
+ struct dmcu *dmcu = core_dc->res_pool->dmcu;
+ int actual_clock = requested_clk_khz;
+ /* Prepare to program display clock*/
+ memset(&dce_clk_params, 0, sizeof(dce_clk_params));
+
+ /* Make sure requested clock isn't lower than minimum threshold*/
+ if (requested_clk_khz > 0)
+ requested_clk_khz = max(requested_clk_khz,
+ clk_mgr_dce->dentist_vco_freq_khz / 62);
+
+ dce_clk_params.target_clock_frequency = requested_clk_khz;
+ dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
+ dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK;
+
+ bp->funcs->set_dce_clock(bp, &dce_clk_params);
+ actual_clock = dce_clk_params.target_clock_frequency;
+
+ /*
+ * from power down, we need mark the clock state as ClocksStateNominal
+ * from HWReset, so when resume we will call pplib voltage regulator.
+ */
+ if (requested_clk_khz == 0)
+ clk_mgr_dce->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL;
+
+
+ if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+ if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
+ if (clk_mgr_dce->dfs_bypass_disp_clk != actual_clock)
+ dmcu->funcs->set_psr_wait_loop(dmcu,
+ actual_clock / 1000 / 7);
+ }
+ }
+
+ clk_mgr_dce->dfs_bypass_disp_clk = actual_clock;
+ return actual_clock;
+
+}
+
+int dce112_set_dprefclk(struct clk_mgr *clk_mgr)
+{
+ struct bp_set_dce_clock_parameters dce_clk_params;
+ struct dc_bios *bp = clk_mgr->ctx->dc_bios;
+
+ memset(&dce_clk_params, 0, sizeof(dce_clk_params));
+
+ /*Program DP ref Clock*/
+ /*VBIOS will determine DPREFCLK frequency, so we don't set it*/
+ dce_clk_params.target_clock_frequency = 0;
+ dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
+ dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK;
+ if (!ASICREV_IS_VEGA20_P(clk_mgr->ctx->asic_id.hw_internal_rev))
+ dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK =
+ (dce_clk_params.pll_id ==
+ CLOCK_SOURCE_COMBO_DISPLAY_PLL0);
+ else
+ dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false;
+
+ bp->funcs->set_dce_clock(bp, &dce_clk_params);
+
+ /* Returns the dp_refclk that was set */
+ return dce_clk_params.target_clock_frequency;
+}
+
static void dce_clock_read_integrated_info(struct dce_clk_mgr *clk_mgr_dce)
{
struct dc_debug_options *debug = &clk_mgr_dce->base.ctx->dc->debug;
@@ -782,22 +852,22 @@ static void dce12_update_clocks(struct clk_mgr *clk_mgr,
dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context);
}
-static const struct clk_mgr_funcs dce120_funcs = {
+static struct clk_mgr_funcs dce120_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.update_clocks = dce12_update_clocks
};
-static const struct clk_mgr_funcs dce112_funcs = {
+static struct clk_mgr_funcs dce112_funcs = {
.get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,
.update_clocks = dce112_update_clocks
};
-static const struct clk_mgr_funcs dce110_funcs = {
+static struct clk_mgr_funcs dce110_funcs = {
.get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,
.update_clocks = dce11_update_clocks,
};
-static const struct clk_mgr_funcs dce_funcs = {
+static struct clk_mgr_funcs dce_funcs = {
.get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,
.update_clocks = dce_update_clocks
};
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h
index c8f8c442142a..cca0c95d8cc8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h
@@ -39,6 +39,11 @@
#define CLK_COMMON_REG_LIST_DCN_BASE() \
SR(DENTIST_DISPCLK_CNTL)
+#define VBIOS_SMU_MSG_BOX_REG_LIST_RV() \
+ .MP1_SMN_C2PMSG_91 = mmMP1_SMN_C2PMSG_91, \
+ .MP1_SMN_C2PMSG_83 = mmMP1_SMN_C2PMSG_83, \
+ .MP1_SMN_C2PMSG_67 = mmMP1_SMN_C2PMSG_67
+
#define CLK_SF(reg_name, field_name, post_fix)\
.field_name = reg_name ## __ ## field_name ## post_fix
@@ -50,23 +55,39 @@
CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, mask_sh),\
CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh)
+#define CLK_MASK_SH_LIST_RV1(mask_sh) \
+ CLK_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh),\
+ CLK_SF(MP1_SMN_C2PMSG_67, CONTENT, mask_sh),\
+ CLK_SF(MP1_SMN_C2PMSG_83, CONTENT, mask_sh),\
+ CLK_SF(MP1_SMN_C2PMSG_91, CONTENT, mask_sh),
+
+
#define CLK_REG_FIELD_LIST(type) \
type DPREFCLK_SRC_SEL; \
type DENTIST_DPREFCLK_WDIVIDER; \
type DENTIST_DISPCLK_WDIVIDER; \
type DENTIST_DISPCLK_CHG_DONE;
+#define VBIOS_SMU_REG_FIELD_LIST(type) \
+ type CONTENT;
+
struct clk_mgr_shift {
CLK_REG_FIELD_LIST(uint8_t)
+ VBIOS_SMU_REG_FIELD_LIST(uint32_t)
};
struct clk_mgr_mask {
CLK_REG_FIELD_LIST(uint32_t)
+ VBIOS_SMU_REG_FIELD_LIST(uint32_t)
};
struct clk_mgr_registers {
uint32_t DPREFCLK_CNTL;
uint32_t DENTIST_DISPCLK_CNTL;
+
+ uint32_t MP1_SMN_C2PMSG_67;
+ uint32_t MP1_SMN_C2PMSG_83;
+ uint32_t MP1_SMN_C2PMSG_91;
};
struct state_dependent_clocks {
@@ -168,6 +189,8 @@ void dce110_fill_display_configs(
struct dm_pp_display_configuration *pp_display_cfg);
int dce112_set_clock(struct clk_mgr *dccg, int requested_clk_khz);
+int dce112_set_dispclk(struct clk_mgr *clk_mgr, int requested_clk_khz);
+int dce112_set_dprefclk(struct clk_mgr *clk_mgr);
struct clk_mgr *dce_clk_mgr_create(
struct dc_context *ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index f70437aae8e0..df422440845b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -183,8 +183,8 @@ static bool calculate_fb_and_fractional_fb_divider(
*RETURNS:
* It fills the PLLSettings structure with PLL Dividers values
* if calculated values are within required tolerance
-* It returns - true if eror is within tolerance
-* - false if eror is not within tolerance
+* It returns - true if error is within tolerance
+* - false if error is not within tolerance
*/
static bool calc_fb_divider_checking_tolerance(
struct calc_pll_clock_source *calc_pll_cs,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
index cd26161bcc4d..526aab438374 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
@@ -268,6 +268,8 @@ static bool setup_engine(
struct dce_i2c_hw *dce_i2c_hw)
{
uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE;
+ /* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/
+ REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1);
if (dce_i2c_hw->setup_limit != 0)
i2c_setup_limit = dce_i2c_hw->setup_limit;
@@ -322,8 +324,6 @@ static void release_engine(
set_speed(dce_i2c_hw, dce_i2c_hw->original_speed);
- /* Release I2C */
- REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1);
/* Reset HW engine */
{
@@ -343,6 +343,9 @@ static void release_engine(
/* HW I2c engine - clock gating feature */
if (!dce_i2c_hw->engine_keep_power_up_count)
REG_UPDATE_N(SETUP, 1, FN(SETUP, DC_I2C_DDC1_ENABLE), 0);
+ /* Release I2C after reset, so HW or DMCU could use it */
+ REG_UPDATE_2(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1,
+ DC_I2C_SW_USE_I2C_REG_REQ, 0);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
index 575500755b2e..f718e3d396f2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
@@ -105,6 +105,7 @@ enum {
I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_DATA_DRIVE_SEL, mask_sh),\
I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_TRANSACTION_DELAY, mask_sh),\
I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_BYTE_DELAY, mask_sh),\
+ I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, mask_sh),\
I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, mask_sh),\
I2C_SF(DC_I2C_ARBITRATION, DC_I2C_NO_QUEUED_SW_GO, mask_sh),\
I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_PRIORITY, mask_sh),\
@@ -146,6 +147,7 @@ struct dce_i2c_shift {
uint8_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY;
uint8_t DC_I2C_DDC1_INTRA_BYTE_DELAY;
uint8_t DC_I2C_SW_DONE_USING_I2C_REG;
+ uint8_t DC_I2C_SW_USE_I2C_REG_REQ;
uint8_t DC_I2C_NO_QUEUED_SW_GO;
uint8_t DC_I2C_SW_PRIORITY;
uint8_t DC_I2C_SOFT_RESET;
@@ -184,6 +186,7 @@ struct dce_i2c_mask {
uint32_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY;
uint32_t DC_I2C_DDC1_INTRA_BYTE_DELAY;
uint32_t DC_I2C_SW_DONE_USING_I2C_REG;
+ uint32_t DC_I2C_SW_USE_I2C_REG_REQ;
uint32_t DC_I2C_NO_QUEUED_SW_GO;
uint32_t DC_I2C_SW_PRIORITY;
uint32_t DC_I2C_SOFT_RESET;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
index 14309fe6f2e6..61fe2596fdb3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
@@ -418,6 +418,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
break;
case COLOR_SPACE_YCBCR709:
case COLOR_SPACE_YCBCR709_LIMITED:
+ case COLOR_SPACE_YCBCR709_BLACK:
misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */
misc1 = misc1 & ~0x80; /* bit7 = 0*/
dynamic_range_ycbcr = 1; /*bt709*/
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
index e938bf9986d3..d7a531e9700f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
@@ -867,13 +867,55 @@ enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, s
return DC_FAIL_SURFACE_VALIDATE;
}
+struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream)
+{
+ int i;
+ int j = -1;
+ struct dc_link *link = stream->link;
+
+ for (i = 0; i < pool->stream_enc_count; i++) {
+ if (!res_ctx->is_stream_enc_acquired[i] &&
+ pool->stream_enc[i]) {
+ /* Store first available for MST second display
+ * in daisy chain use case
+ */
+ j = i;
+ if (pool->stream_enc[i]->id ==
+ link->link_enc->preferred_engine)
+ return pool->stream_enc[i];
+ }
+ }
+
+ /*
+ * below can happen in cases when stream encoder is acquired:
+ * 1) for second MST display in chain, so preferred engine already
+ * acquired;
+ * 2) for another link, which preferred engine already acquired by any
+ * MST configuration.
+ *
+ * If signal is of DP type and preferred engine not found, return last available
+ *
+ * TODO - This is just a patch up and a generic solution is
+ * required for non DP connectors.
+ */
+
+ if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT)
+ return pool->stream_enc[j];
+
+ return NULL;
+}
+
static const struct resource_funcs dce100_res_pool_funcs = {
.destroy = dce100_destroy_resource_pool,
.link_enc_create = dce100_link_encoder_create,
.validate_bandwidth = dce100_validate_bandwidth,
.validate_plane = dce100_validate_plane,
.add_stream_to_ctx = dce100_add_stream_to_ctx,
- .validate_global = dce100_validate_global
+ .validate_global = dce100_validate_global,
+ .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link
};
static bool construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
index 2f366d66635d..fecab7c560f5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
@@ -46,4 +46,9 @@ enum dc_status dce100_add_stream_to_ctx(
struct dc_state *new_ctx,
struct dc_stream_state *dc_stream);
+struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream);
+
#endif /* DCE100_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 7ac50ab1b762..69f215967af3 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -242,6 +242,9 @@ static void build_prescale_params(struct ipp_prescale_params *prescale_params,
prescale_params->mode = IPP_PRESCALE_MODE_FIXED_UNSIGNED;
switch (plane_state->format) {
+ case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+ prescale_params->scale = 0x2082;
+ break;
case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
prescale_params->scale = 0x2020;
@@ -1296,6 +1299,11 @@ static enum dc_status dce110_enable_stream_timing(
pipe_ctx->stream_res.tg->funcs->program_timing(
pipe_ctx->stream_res.tg,
&stream->timing,
+ 0,
+ 0,
+ 0,
+ 0,
+ pipe_ctx->stream->signal,
true);
}
@@ -1488,10 +1496,11 @@ static void disable_vga_and_power_gate_all_controllers(
}
}
-static struct dc_link *get_link_for_edp(struct dc *dc)
+static struct dc_link *get_edp_link(struct dc *dc)
{
int i;
+ // report any eDP links, even unconnected DDI's
for (i = 0; i < dc->link_count; i++) {
if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP)
return dc->links[i];
@@ -1499,23 +1508,13 @@ static struct dc_link *get_link_for_edp(struct dc *dc)
return NULL;
}
-static struct dc_link *get_link_for_edp_to_turn_off(
+static struct dc_link *get_edp_link_with_sink(
struct dc *dc,
struct dc_state *context)
{
int i;
struct dc_link *link = NULL;
- /* check if eDP panel is suppose to be set mode, if yes, no need to disable */
- for (i = 0; i < context->stream_count; i++) {
- if (context->streams[i]->signal == SIGNAL_TYPE_EDP) {
- if (context->streams[i]->dpms_off == true)
- return context->streams[i]->sink->link;
- else
- return NULL;
- }
- }
-
/* check if there is an eDP panel not in use */
for (i = 0; i < dc->link_count; i++) {
if (dc->links[i]->local_sink &&
@@ -1538,59 +1537,53 @@ static struct dc_link *get_link_for_edp_to_turn_off(
void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
{
int i;
- struct dc_link *edp_link_to_turnoff = NULL;
- struct dc_link *edp_link = get_link_for_edp(dc);
- bool can_edp_fast_boot_optimize = false;
- bool apply_edp_fast_boot_optimization = false;
+ struct dc_link *edp_link_with_sink = get_edp_link_with_sink(dc, context);
+ struct dc_link *edp_link = get_edp_link(dc);
+ bool can_apply_edp_fast_boot = false;
bool can_apply_seamless_boot = false;
- for (i = 0; i < context->stream_count; i++) {
- if (context->streams[i]->apply_seamless_boot_optimization) {
- can_apply_seamless_boot = true;
- break;
- }
- }
-
if (dc->hwss.init_pipes)
dc->hwss.init_pipes(dc, context);
- if (edp_link) {
- /* this seems to cause blank screens on DCE8 */
- if ((dc->ctx->dce_version == DCE_VERSION_8_0) ||
- (dc->ctx->dce_version == DCE_VERSION_8_1) ||
- (dc->ctx->dce_version == DCE_VERSION_8_3))
- can_edp_fast_boot_optimize = false;
- else
- can_edp_fast_boot_optimize =
- edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc);
+ // Check fastboot support, disable on DCE8 because of blank screens
+ if (edp_link && dc->ctx->dce_version != DCE_VERSION_8_0 &&
+ dc->ctx->dce_version != DCE_VERSION_8_1 &&
+ dc->ctx->dce_version != DCE_VERSION_8_3) {
+
+ // enable fastboot if backend is enabled on eDP
+ if (edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc)) {
+ /* Find eDP stream and set optimization flag */
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->streams[i]->signal == SIGNAL_TYPE_EDP) {
+ context->streams[i]->apply_edp_fast_boot_optimization = true;
+ can_apply_edp_fast_boot = true;
+ break;
+ }
+ }
+ }
}
- if (can_edp_fast_boot_optimize)
- edp_link_to_turnoff = get_link_for_edp_to_turn_off(dc, context);
-
- /* if OS doesn't light up eDP and eDP link is available, we want to disable
- * If resume from S4/S5, should optimization.
- */
- if (can_edp_fast_boot_optimize && !edp_link_to_turnoff) {
- /* Find eDP stream and set optimization flag */
- for (i = 0; i < context->stream_count; i++) {
- if (context->streams[i]->signal == SIGNAL_TYPE_EDP) {
- context->streams[i]->apply_edp_fast_boot_optimization = true;
- apply_edp_fast_boot_optimization = true;
- }
+ // Check seamless boot support
+ for (i = 0; i < context->stream_count; i++) {
+ if (context->streams[i]->apply_seamless_boot_optimization) {
+ can_apply_seamless_boot = true;
+ break;
}
}
- if (!apply_edp_fast_boot_optimization && !can_apply_seamless_boot) {
- if (edp_link_to_turnoff) {
+ /* eDP should not have stream in resume from S4 and so even with VBios post
+ * it should get turned off
+ */
+ if (!can_apply_edp_fast_boot && !can_apply_seamless_boot) {
+ if (edp_link_with_sink) {
/*turn off backlight before DP_blank and encoder powered down*/
- dc->hwss.edp_backlight_control(edp_link_to_turnoff, false);
+ dc->hwss.edp_backlight_control(edp_link_with_sink, false);
}
/*resume from S3, no vbios posting, no need to power down again*/
power_down_all_hw_blocks(dc);
disable_vga_and_power_gate_all_controllers(dc);
- if (edp_link_to_turnoff)
- dc->hwss.edp_power_control(edp_link_to_turnoff, false);
+ if (edp_link_with_sink)
+ dc->hwss.edp_power_control(edp_link_with_sink, false);
}
bios_set_scratch_acc_mode_change(dc->ctx->dc_bios);
}
@@ -2030,8 +2023,10 @@ enum dc_status dce110_apply_ctx_to_hw(
if (pipe_ctx->stream == NULL)
continue;
- if (pipe_ctx->stream == pipe_ctx_old->stream)
+ if (pipe_ctx->stream == pipe_ctx_old->stream &&
+ pipe_ctx->stream->link->link_state_valid) {
continue;
+ }
if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx))
continue;
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index dcd04e9ea76b..f982c8b196cf 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -1097,6 +1097,11 @@ static struct pipe_ctx *dce110_acquire_underlay(
pipe_ctx->stream_res.tg->funcs->program_timing(pipe_ctx->stream_res.tg,
&stream->timing,
+ 0,
+ 0,
+ 0,
+ 0,
+ pipe_ctx->stream->signal,
false);
pipe_ctx->stream_res.tg->funcs->enable_advanced_request(
@@ -1129,6 +1134,38 @@ static void dce110_destroy_resource_pool(struct resource_pool **pool)
*pool = NULL;
}
+struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream)
+{
+ int i;
+ int j = -1;
+ struct dc_link *link = stream->link;
+
+ for (i = 0; i < pool->stream_enc_count; i++) {
+ if (!res_ctx->is_stream_enc_acquired[i] &&
+ pool->stream_enc[i]) {
+ /* Store first available for MST second display
+ * in daisy chain use case
+ */
+ j = i;
+ if (pool->stream_enc[i]->id ==
+ link->link_enc->preferred_engine)
+ return pool->stream_enc[i];
+ }
+ }
+
+ /*
+ * For CZ and later, we can allow DIG FE and BE to differ for all display types
+ */
+
+ if (j >= 0)
+ return pool->stream_enc[j];
+
+ return NULL;
+}
+
static const struct resource_funcs dce110_res_pool_funcs = {
.destroy = dce110_destroy_resource_pool,
@@ -1137,7 +1174,8 @@ static const struct resource_funcs dce110_res_pool_funcs = {
.validate_plane = dce110_validate_plane,
.acquire_idle_pipe_for_layer = dce110_acquire_underlay,
.add_stream_to_ctx = dce110_add_stream_to_ctx,
- .validate_global = dce110_validate_global
+ .validate_global = dce110_validate_global,
+ .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link
};
static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
index e5f168c1f8c8..aa4531e0800e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
@@ -45,5 +45,10 @@ struct resource_pool *dce110_create_resource_pool(
struct dc *dc,
struct hw_asic_id asic_id);
+struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream);
+
#endif /* __DC_RESOURCE_DCE110_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
index 1b2fe0df347f..5f7c2c5641c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
@@ -1952,6 +1952,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg,
void dce110_tg_program_timing(struct timing_generator *tg,
const struct dc_crtc_timing *timing,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ const enum signal_type signal,
bool use_vbios)
{
if (use_vbios)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
index 734d4965dab1..768ccf27ada9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
@@ -256,6 +256,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg,
void dce110_tg_program_timing(struct timing_generator *tg,
const struct dc_crtc_timing *timing,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ const enum signal_type signal,
bool use_vbios);
bool dce110_tg_is_blanked(struct timing_generator *tg);
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
index a3cef60380ed..a13a2f58944e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
@@ -435,6 +435,11 @@ static void dce110_timing_generator_v_set_blank(struct timing_generator *tg,
static void dce110_timing_generator_v_program_timing(struct timing_generator *tg,
const struct dc_crtc_timing *timing,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ const enum signal_type signal,
bool use_vbios)
{
if (use_vbios)
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
index a480b15f6885..cdf759b0f5f9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
@@ -993,7 +993,8 @@ static const struct resource_funcs dce112_res_pool_funcs = {
.validate_bandwidth = dce112_validate_bandwidth,
.validate_plane = dce100_validate_plane,
.add_stream_to_ctx = dce112_add_stream_to_ctx,
- .validate_global = dce112_validate_global
+ .validate_global = dce112_validate_global,
+ .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link
};
static void bw_calcs_data_update_from_pplib(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
index 6d49c7143c67..9e6a5d84b0a1 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
@@ -480,7 +480,7 @@ static const struct dc_debug_options debug_defaults = {
.disable_clock_gate = true,
};
-struct clock_source *dce120_clock_source_create(
+static struct clock_source *dce120_clock_source_create(
struct dc_context *ctx,
struct dc_bios *bios,
enum clock_source_id id,
@@ -503,14 +503,14 @@ struct clock_source *dce120_clock_source_create(
return NULL;
}
-void dce120_clock_source_destroy(struct clock_source **clk_src)
+static void dce120_clock_source_destroy(struct clock_source **clk_src)
{
kfree(TO_DCE110_CLK_SRC(*clk_src));
*clk_src = NULL;
}
-bool dce120_hw_sequencer_create(struct dc *dc)
+static bool dce120_hw_sequencer_create(struct dc *dc)
{
/* All registers used by dce11.2 match those in dce11 in offset and
* structure
@@ -837,7 +837,8 @@ static const struct resource_funcs dce120_res_pool_funcs = {
.link_enc_create = dce120_link_encoder_create,
.validate_bandwidth = dce112_validate_bandwidth,
.validate_plane = dce100_validate_plane,
- .add_stream_to_ctx = dce112_add_stream_to_ctx
+ .add_stream_to_ctx = dce112_add_stream_to_ctx,
+ .find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link
};
static void bw_calcs_data_update_from_pplib(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
index 04b866f0fa1f..098e56962f2a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
@@ -734,8 +734,13 @@ void dce120_tg_set_overscan_color(struct timing_generator *tg,
CRTC_OVERSCAN_COLOR_RED, overscan_color->color_r_cr);
}
-void dce120_tg_program_timing(struct timing_generator *tg,
+static void dce120_tg_program_timing(struct timing_generator *tg,
const struct dc_crtc_timing *timing,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ const enum signal_type signal,
bool use_vbios)
{
if (use_vbios)
@@ -1109,6 +1114,92 @@ static bool dce120_arm_vert_intr(
return true;
}
+
+static bool dce120_is_tg_enabled(struct timing_generator *tg)
+{
+ struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
+ uint32_t value, field;
+
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CONTROL,
+ tg110->offsets.crtc);
+ field = get_reg_field_value(value, CRTC0_CRTC_CONTROL,
+ CRTC_CURRENT_MASTER_EN_STATE);
+
+ return field == 1;
+}
+
+static bool dce120_configure_crc(struct timing_generator *tg,
+ const struct crc_params *params)
+{
+ struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
+
+ /* Cannot configure crc on a CRTC that is disabled */
+ if (!dce120_is_tg_enabled(tg))
+ return false;
+
+ /* First, disable CRC before we configure it. */
+ dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
+ tg110->offsets.crtc, 0);
+
+ if (!params->enable)
+ return true;
+
+ /* Program frame boundaries */
+ /* Window A x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL,
+ CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start,
+ CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end);
+
+ /* Window A y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL,
+ CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start,
+ CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end);
+
+ /* Window B x axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL,
+ CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start,
+ CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end);
+
+ /* Window B y axis start and end. */
+ CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL,
+ CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start,
+ CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end);
+
+ /* Set crc mode and selection, and enable. Only using CRC0*/
+ CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
+ CRTC_CRC_EN, params->continuous_mode ? 1 : 0,
+ CRTC_CRC0_SELECT, params->selection,
+ CRTC_CRC_EN, 1);
+
+ return true;
+}
+
+static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr,
+ uint32_t *g_y, uint32_t *b_cb)
+{
+ struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
+ uint32_t value, field;
+
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
+ tg110->offsets.crtc);
+ field = get_reg_field_value(value, CRTC0_CRTC_CRC_CNTL, CRTC_CRC_EN);
+
+ /* Early return if CRC is not enabled for this CRTC */
+ if (!field)
+ return false;
+
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG,
+ tg110->offsets.crtc);
+ *r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR);
+ *g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y);
+
+ value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B,
+ tg110->offsets.crtc);
+ *b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB);
+
+ return true;
+}
+
static const struct timing_generator_funcs dce120_tg_funcs = {
.validate_timing = dce120_tg_validate_timing,
.program_timing = dce120_tg_program_timing,
@@ -1140,6 +1231,9 @@ static const struct timing_generator_funcs dce120_tg_funcs = {
.set_static_screen_control = dce120_timing_generator_set_static_screen_control,
.set_test_pattern = dce120_timing_generator_set_test_pattern,
.arm_vert_intr = dce120_arm_vert_intr,
+ .is_tg_enabled = dce120_is_tg_enabled,
+ .configure_crc = dce120_configure_crc,
+ .get_crc = dce120_get_crc,
};
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index 27d0cc394963..2c21135a8510 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -880,7 +880,8 @@ static const struct resource_funcs dce80_res_pool_funcs = {
.validate_bandwidth = dce80_validate_bandwidth,
.validate_plane = dce100_validate_plane,
.add_stream_to_ctx = dce100_add_stream_to_ctx,
- .validate_global = dce80_validate_global
+ .validate_global = dce80_validate_global,
+ .find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link
};
static bool dce80_construct(
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
index 8b5ce557ee71..397e7f94e1e8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
@@ -107,12 +107,17 @@ static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz)
static void program_timing(struct timing_generator *tg,
const struct dc_crtc_timing *timing,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ const enum signal_type signal,
bool use_vbios)
{
if (!use_vbios)
program_pix_dur(tg, timing->pix_clk_100hz);
- dce110_tg_program_timing(tg, timing, use_vbios);
+ dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios);
}
static void dce80_timing_generator_enable_advanced_request(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
index 2b2de1d913c9..9f2ffce10e12 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
@@ -27,6 +27,7 @@
#include "reg_helper.h"
#include "core_types.h"
+#include "dal_asic_id.h"
#define TO_DCE_CLK_MGR(clocks)\
container_of(clocks, struct dce_clk_mgr, base)
@@ -91,13 +92,18 @@ static int dcn1_determine_dppclk_threshold(struct clk_mgr *clk_mgr, struct dc_cl
static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clocks *new_clocks)
{
+ int i;
struct dc *dc = clk_mgr->ctx->dc;
int dispclk_to_dpp_threshold = dcn1_determine_dppclk_threshold(clk_mgr, new_clocks);
bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz;
- int i;
/* set disp clk to dpp clk threshold */
- dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold);
+
+ if (clk_mgr->funcs->set_dispclk && clk_mgr->funcs->set_dprefclk) {
+ clk_mgr->funcs->set_dispclk(clk_mgr, dispclk_to_dpp_threshold);
+ clk_mgr->funcs->set_dprefclk(clk_mgr);
+ } else
+ dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold);
/* update request dpp clk division option */
for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -113,8 +119,13 @@ static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clo
}
/* If target clk not same as dppclk threshold, set to target clock */
- if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz)
- dce112_set_clock(clk_mgr, new_clocks->dispclk_khz);
+ if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) {
+ if (clk_mgr->funcs->set_dispclk && clk_mgr->funcs->set_dprefclk) {
+ clk_mgr->funcs->set_dispclk(clk_mgr, new_clocks->dispclk_khz);
+ clk_mgr->funcs->set_dprefclk(clk_mgr);
+ } else
+ dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold);
+ }
clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz;
clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz;
@@ -242,7 +253,62 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
}
}
}
-static const struct clk_mgr_funcs dcn1_funcs = {
+
+#define VBIOSSMC_MSG_SetDispclkFreq 0x4
+#define VBIOSSMC_MSG_SetDprefclkFreq 0x5
+
+int dcn10_set_dispclk(struct clk_mgr *clk_mgr_base, int requested_dispclk_khz)
+{
+ int actual_dispclk_set_khz = -1;
+ struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr_base);
+
+ /* First clear response register */
+ //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_91, 0);
+ REG_WRITE(MP1_SMN_C2PMSG_91, 0);
+
+ /* Set the parameter register for the SMU message, unit is Mhz */
+ //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_83, requested_dispclk_khz / 1000);
+ REG_WRITE(MP1_SMN_C2PMSG_83, requested_dispclk_khz / 1000);
+
+ /* Trigger the message transaction by writing the message ID */
+ //dm_write_reg(ctx, mmMP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDispclkFreq);
+ REG_WRITE(MP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDispclkFreq);
+
+ REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000);
+
+ /* Actual dispclk set is returned in the parameter register */
+ actual_dispclk_set_khz = REG_READ(MP1_SMN_C2PMSG_83) * 1000;
+
+ return actual_dispclk_set_khz;
+
+}
+
+int dcn10_set_dprefclk(struct clk_mgr *clk_mgr_base)
+{
+ int actual_dprefclk_set_khz = -1;
+ struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr_base);
+
+ REG_WRITE(MP1_SMN_C2PMSG_91, 0);
+
+ /* Set the parameter register for the SMU message */
+ REG_WRITE(MP1_SMN_C2PMSG_83, clk_mgr_dce->dprefclk_khz / 1000);
+
+ /* Trigger the message transaction by writing the message ID */
+ REG_WRITE(MP1_SMN_C2PMSG_67, VBIOSSMC_MSG_SetDprefclkFreq);
+
+ /* Wait for SMU response */
+ REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000);
+
+ actual_dprefclk_set_khz = REG_READ(MP1_SMN_C2PMSG_83) * 1000;
+
+ return actual_dprefclk_set_khz;
+}
+
+int (*set_dispclk)(struct pp_smu *pp_smu, int dispclk);
+
+int (*set_dprefclk)(struct pp_smu *pp_smu);
+
+static struct clk_mgr_funcs dcn1_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.update_clocks = dcn1_update_clocks
};
@@ -266,8 +332,8 @@ struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx)
clk_mgr_dce->dprefclk_ss_percentage = 0;
clk_mgr_dce->dprefclk_ss_divider = 1000;
clk_mgr_dce->ss_on_dprefclk = false;
-
clk_mgr_dce->dprefclk_khz = 600000;
+
if (bp->integrated_info)
clk_mgr_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq;
if (clk_mgr_dce->dentist_vco_freq_khz == 0) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index 0db2a6e96fc0..bf978831bb0e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -263,20 +263,15 @@ void hubbub1_wm_change_req_wa(struct hubbub *hubbub)
DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, 1);
}
-void hubbub1_program_watermarks(
+void hubbub1_program_urgent_watermarks(
struct hubbub *hubbub,
struct dcn_watermark_set *watermarks,
unsigned int refclk_mhz,
bool safe_to_lower)
{
struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
- /*
- * Need to clamp to max of the register values (i.e. no wrap)
- * for dcn1, all wm registers are 21-bit wide
- */
uint32_t prog_wm_value;
-
/* Repeat for water mark set A, B, C and D. */
/* clock state A */
if (safe_to_lower || watermarks->a.urgent_ns > hubbub1->watermarks.a.urgent_ns) {
@@ -291,60 +286,14 @@ void hubbub1_program_watermarks(
watermarks->a.urgent_ns, prog_wm_value);
}
- if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A)) {
- if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) {
- hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns;
- prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns,
- refclk_mhz, 0x1fffff);
- REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->a.pte_meta_urgent_ns, prog_wm_value);
- }
- }
-
- if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A)) {
- if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns
- > hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) {
- hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
- watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns;
- prog_wm_value = convert_and_clamp(
- watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0x1fffff);
- REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0,
- DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
- }
-
- if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns
- > hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) {
- hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns =
- watermarks->a.cstate_pstate.cstate_exit_ns;
- prog_wm_value = convert_and_clamp(
- watermarks->a.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0x1fffff);
- REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0,
- DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value);
- }
- }
-
- if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns
- > hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) {
- hubbub1->watermarks.a.cstate_pstate.pstate_change_ns =
- watermarks->a.cstate_pstate.pstate_change_ns;
- prog_wm_value = convert_and_clamp(
- watermarks->a.cstate_pstate.pstate_change_ns,
+ if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) {
+ hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns;
+ prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns,
refclk_mhz, 0x1fffff);
- REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0,
- DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n"
- "HW register value = 0x%x\n\n",
- watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value);
+ REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->a.pte_meta_urgent_ns, prog_wm_value);
}
/* clock state B */
@@ -360,60 +309,14 @@ void hubbub1_program_watermarks(
watermarks->b.urgent_ns, prog_wm_value);
}
- if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B)) {
- if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) {
- hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns;
- prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns,
- refclk_mhz, 0x1fffff);
- REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->b.pte_meta_urgent_ns, prog_wm_value);
- }
- }
-
- if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B)) {
- if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns
- > hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) {
- hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns =
- watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns;
- prog_wm_value = convert_and_clamp(
- watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0x1fffff);
- REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0,
- DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
- }
-
- if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns
- > hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) {
- hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns =
- watermarks->b.cstate_pstate.cstate_exit_ns;
- prog_wm_value = convert_and_clamp(
- watermarks->b.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0x1fffff);
- REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0,
- DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value);
- }
- }
-
- if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns
- > hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) {
- hubbub1->watermarks.b.cstate_pstate.pstate_change_ns =
- watermarks->b.cstate_pstate.pstate_change_ns;
- prog_wm_value = convert_and_clamp(
- watermarks->b.cstate_pstate.pstate_change_ns,
+ if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) {
+ hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns;
+ prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns,
refclk_mhz, 0x1fffff);
- REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0,
- DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n"
- "HW register value = 0x%x\n\n",
- watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value);
+ REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->b.pte_meta_urgent_ns, prog_wm_value);
}
/* clock state C */
@@ -429,60 +332,14 @@ void hubbub1_program_watermarks(
watermarks->c.urgent_ns, prog_wm_value);
}
- if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C)) {
- if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) {
- hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns;
- prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns,
- refclk_mhz, 0x1fffff);
- REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->c.pte_meta_urgent_ns, prog_wm_value);
- }
- }
-
- if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C)) {
- if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns
- > hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) {
- hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns =
- watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns;
- prog_wm_value = convert_and_clamp(
- watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0x1fffff);
- REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0,
- DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
- }
-
- if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns
- > hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) {
- hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns =
- watermarks->c.cstate_pstate.cstate_exit_ns;
- prog_wm_value = convert_and_clamp(
- watermarks->c.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0x1fffff);
- REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0,
- DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value);
- }
- }
-
- if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns
- > hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) {
- hubbub1->watermarks.c.cstate_pstate.pstate_change_ns =
- watermarks->c.cstate_pstate.pstate_change_ns;
- prog_wm_value = convert_and_clamp(
- watermarks->c.cstate_pstate.pstate_change_ns,
+ if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) {
+ hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns;
+ prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns,
refclk_mhz, 0x1fffff);
- REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0,
- DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n"
- "HW register value = 0x%x\n\n",
- watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value);
+ REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->c.pte_meta_urgent_ns, prog_wm_value);
}
/* clock state D */
@@ -498,48 +355,199 @@ void hubbub1_program_watermarks(
watermarks->d.urgent_ns, prog_wm_value);
}
- if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D)) {
- if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) {
- hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns;
- prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns,
- refclk_mhz, 0x1fffff);
- REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->d.pte_meta_urgent_ns, prog_wm_value);
- }
+ if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) {
+ hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns;
+ prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns,
+ refclk_mhz, 0x1fffff);
+ REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->d.pte_meta_urgent_ns, prog_wm_value);
}
+}
- if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D)) {
- if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns
- > hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) {
- hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns =
- watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns;
- prog_wm_value = convert_and_clamp(
- watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns,
- refclk_mhz, 0x1fffff);
- REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0,
- DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
- }
+void hubbub1_program_stutter_watermarks(
+ struct hubbub *hubbub,
+ struct dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower)
+{
+ struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
+ uint32_t prog_wm_value;
- if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns
- > hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) {
- hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns =
- watermarks->d.cstate_pstate.cstate_exit_ns;
- prog_wm_value = convert_and_clamp(
- watermarks->d.cstate_pstate.cstate_exit_ns,
- refclk_mhz, 0x1fffff);
- REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0,
- DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value);
- DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n"
- "HW register value = 0x%x\n",
- watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value);
- }
+ /* clock state A */
+ if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns
+ > hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) {
+ hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
+ watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns,
+ refclk_mhz, 0x1fffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
+ }
+
+ if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns
+ > hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) {
+ hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns =
+ watermarks->a.cstate_pstate.cstate_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->a.cstate_pstate.cstate_exit_ns,
+ refclk_mhz, 0x1fffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value);
+ }
+
+ /* clock state B */
+ if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns
+ > hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) {
+ hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns =
+ watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns,
+ refclk_mhz, 0x1fffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
}
+ if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns
+ > hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) {
+ hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns =
+ watermarks->b.cstate_pstate.cstate_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->b.cstate_pstate.cstate_exit_ns,
+ refclk_mhz, 0x1fffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value);
+ }
+
+ /* clock state C */
+ if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns
+ > hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) {
+ hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns =
+ watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns,
+ refclk_mhz, 0x1fffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
+ }
+
+ if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns
+ > hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) {
+ hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns =
+ watermarks->c.cstate_pstate.cstate_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->c.cstate_pstate.cstate_exit_ns,
+ refclk_mhz, 0x1fffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value);
+ }
+
+ /* clock state D */
+ if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns
+ > hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) {
+ hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns =
+ watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns,
+ refclk_mhz, 0x1fffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0,
+ DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
+ }
+
+ if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns
+ > hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) {
+ hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns =
+ watermarks->d.cstate_pstate.cstate_exit_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->d.cstate_pstate.cstate_exit_ns,
+ refclk_mhz, 0x1fffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0,
+ DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n"
+ "HW register value = 0x%x\n",
+ watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value);
+ }
+
+}
+
+void hubbub1_program_pstate_watermarks(
+ struct hubbub *hubbub,
+ struct dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower)
+{
+ struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
+ uint32_t prog_wm_value;
+
+ /* clock state A */
+ if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns
+ > hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) {
+ hubbub1->watermarks.a.cstate_pstate.pstate_change_ns =
+ watermarks->a.cstate_pstate.pstate_change_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->a.cstate_pstate.pstate_change_ns,
+ refclk_mhz, 0x1fffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0,
+ DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value);
+ }
+
+ /* clock state B */
+ if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns
+ > hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) {
+ hubbub1->watermarks.b.cstate_pstate.pstate_change_ns =
+ watermarks->b.cstate_pstate.pstate_change_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->b.cstate_pstate.pstate_change_ns,
+ refclk_mhz, 0x1fffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0,
+ DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value);
+ }
+
+ /* clock state C */
+ if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns
+ > hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) {
+ hubbub1->watermarks.c.cstate_pstate.pstate_change_ns =
+ watermarks->c.cstate_pstate.pstate_change_ns;
+ prog_wm_value = convert_and_clamp(
+ watermarks->c.cstate_pstate.pstate_change_ns,
+ refclk_mhz, 0x1fffff);
+ REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0,
+ DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value);
+ DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n"
+ "HW register value = 0x%x\n\n",
+ watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value);
+ }
+
+ /* clock state D */
if (safe_to_lower || watermarks->d.cstate_pstate.pstate_change_ns
> hubbub1->watermarks.d.cstate_pstate.pstate_change_ns) {
hubbub1->watermarks.d.cstate_pstate.pstate_change_ns =
@@ -553,6 +561,22 @@ void hubbub1_program_watermarks(
"HW register value = 0x%x\n\n",
watermarks->d.cstate_pstate.pstate_change_ns, prog_wm_value);
}
+}
+
+void hubbub1_program_watermarks(
+ struct hubbub *hubbub,
+ struct dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower)
+{
+ struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
+ /*
+ * Need to clamp to max of the register values (i.e. no wrap)
+ * for dcn1, all wm registers are 21-bit wide
+ */
+ hubbub1_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
+ hubbub1_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
+ hubbub1_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
REG_UPDATE(DCHUBBUB_ARB_SAT_LEVEL,
DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz);
@@ -903,9 +927,7 @@ void hubbub1_construct(struct hubbub *hubbub,
hubbub1->masks = hubbub_mask;
hubbub1->debug_test_index_pstate = 0x7;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
if (ctx->dce_version == DCN_VERSION_1_01)
hubbub1->debug_test_index_pstate = 0xB;
-#endif
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
index 85811b24a497..7c2559c9ae23 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
@@ -262,4 +262,20 @@ void hubbub1_construct(struct hubbub *hubbub,
const struct dcn_hubbub_shift *hubbub_shift,
const struct dcn_hubbub_mask *hubbub_mask);
+void hubbub1_program_urgent_watermarks(
+ struct hubbub *hubbub,
+ struct dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower);
+void hubbub1_program_stutter_watermarks(
+ struct hubbub *hubbub,
+ struct dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower);
+void hubbub1_program_pstate_watermarks(
+ struct hubbub *hubbub,
+ struct dcn_watermark_set *watermarks,
+ unsigned int refclk_mhz,
+ bool safe_to_lower);
+
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 33d311cea28c..66bb0e7db25c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -658,16 +658,15 @@ static enum dc_status dcn10_enable_stream_timing(
BREAK_TO_DEBUGGER();
return DC_ERROR_UNEXPECTED;
}
- pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset;
- pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start;
- pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset;
- pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width;
-
- pipe_ctx->stream_res.tg->dlg_otg_param.signal = pipe_ctx->stream->signal;
pipe_ctx->stream_res.tg->funcs->program_timing(
pipe_ctx->stream_res.tg,
&stream->timing,
+ pipe_ctx->pipe_dlg_param.vready_offset,
+ pipe_ctx->pipe_dlg_param.vstartup_start,
+ pipe_ctx->pipe_dlg_param.vupdate_offset,
+ pipe_ctx->pipe_dlg_param.vupdate_width,
+ pipe_ctx->stream->signal,
true);
#if 0 /* move to after enable_crtc */
@@ -1756,7 +1755,7 @@ static void dcn10_program_output_csc(struct dc *dc,
bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
{
- if (pipe_ctx->plane_state->visible)
+ if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
return true;
if (pipe_ctx->bottom_pipe && is_lower_pipe_tree_visible(pipe_ctx->bottom_pipe))
return true;
@@ -1765,7 +1764,7 @@ bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
{
- if (pipe_ctx->plane_state->visible)
+ if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
return true;
if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))
return true;
@@ -1774,7 +1773,7 @@ bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
{
- if (pipe_ctx->plane_state->visible)
+ if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
return true;
if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))
return true;
@@ -1920,7 +1919,7 @@ static uint16_t fixed_point_to_int_frac(
return result;
}
-void build_prescale_params(struct dc_bias_and_scale *bias_and_scale,
+void dcn10_build_prescale_params(struct dc_bias_and_scale *bias_and_scale,
const struct dc_plane_state *plane_state)
{
if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN
@@ -1953,7 +1952,7 @@ static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state)
plane_state->color_space);
//set scale and bias registers
- build_prescale_params(&bns_params, plane_state);
+ dcn10_build_prescale_params(&bns_params, plane_state);
if (dpp->funcs->dpp_program_bias_and_scale)
dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params);
}
@@ -2279,14 +2278,15 @@ static void program_all_pipe_in_tree(
if (pipe_ctx->top_pipe == NULL) {
bool blank = !is_pipe_tree_visible(pipe_ctx);
- pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset;
- pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start;
- pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset;
- pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width;
- pipe_ctx->stream_res.tg->dlg_otg_param.signal = pipe_ctx->stream->signal;
-
pipe_ctx->stream_res.tg->funcs->program_global_sync(
- pipe_ctx->stream_res.tg);
+ pipe_ctx->stream_res.tg,
+ pipe_ctx->pipe_dlg_param.vready_offset,
+ pipe_ctx->pipe_dlg_param.vstartup_start,
+ pipe_ctx->pipe_dlg_param.vupdate_offset,
+ pipe_ctx->pipe_dlg_param.vupdate_width);
+
+ pipe_ctx->stream_res.tg->funcs->set_vtg_params(
+ pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
dc->hwss.blank_pixel_data(dc, pipe_ctx, blank);
@@ -2644,9 +2644,6 @@ static void dcn10_wait_for_mpcc_disconnect(
res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst);
pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false;
hubp->funcs->set_blank(hubp, true);
- /*DC_LOG_ERROR(dc->ctx->logger,
- "[debug_mpo: wait_for_mpcc finished waiting on mpcc %d]\n",
- i);*/
}
}
@@ -2790,7 +2787,6 @@ static void apply_front_porch_workaround(
int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
{
- struct timing_generator *optc = pipe_ctx->stream_res.tg;
const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing;
struct dc_crtc_timing patched_crtc_timing;
int vesa_sync_start;
@@ -2813,7 +2809,7 @@ int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
* interlace_factor;
vertical_line_start = asic_blank_end -
- optc->dlg_otg_param.vstartup_start + 1;
+ pipe_ctx->pipe_dlg_param.vstartup_start + 1;
return vertical_line_start;
}
@@ -2961,6 +2957,18 @@ static void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx,
}
}
+static void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx,
+ const uint8_t *custom_sdp_message,
+ unsigned int sdp_message_size)
+{
+ if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
+ pipe_ctx->stream_res.stream_enc->funcs->send_immediate_sdp_message(
+ pipe_ctx->stream_res.stream_enc,
+ custom_sdp_message,
+ sdp_message_size);
+ }
+}
+
static const struct hw_sequencer_funcs dcn10_funcs = {
.program_gamut_remap = program_gamut_remap,
.init_hw = dcn10_init_hw,
@@ -2980,6 +2988,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
.enable_timing_synchronization = dcn10_enable_timing_synchronization,
.enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
.update_info_frame = dce110_update_info_frame,
+ .send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
.enable_stream = dce110_enable_stream,
.disable_stream = dce110_disable_stream,
.unblank_stream = dcn10_unblank_stream,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
index 4b3b27a5d23b..ef94d6b15843 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
@@ -83,6 +83,8 @@ struct pipe_ctx *find_top_pipe_for_stream(
int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx);
+void dcn10_build_prescale_params(struct dc_bias_and_scale *bias_and_scale,
+ const struct dc_plane_state *plane_state);
void lock_all_pipes(struct dc *dc,
struct dc_state *context,
bool lock);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
index 0126a44ba012..e25ae43f8d32 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
@@ -726,6 +726,8 @@ void dcn10_link_encoder_construct(
enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
bp_cap_info.DP_HBR3_EN;
enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+ enc10->base.features.flags.bits.DP_IS_USB_C =
+ bp_cap_info.DP_IS_USB_C;
} else {
DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
__func__,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index 0345d51e9d6f..533b0f3cf6c3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -46,9 +46,7 @@
* This is a workaround for a bug that has existed since R5xx and has not been
* fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive.
*/
-static void optc1_apply_front_porch_workaround(
- struct timing_generator *optc,
- struct dc_crtc_timing *timing)
+static void apply_front_porch_workaround(struct dc_crtc_timing *timing)
{
if (timing->flags.INTERLACE == 1) {
if (timing->v_front_porch < 2)
@@ -60,24 +58,33 @@ static void optc1_apply_front_porch_workaround(
}
void optc1_program_global_sync(
- struct timing_generator *optc)
+ struct timing_generator *optc,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
- if (optc->dlg_otg_param.vstartup_start == 0) {
+ optc1->vready_offset = vready_offset;
+ optc1->vstartup_start = vstartup_start;
+ optc1->vupdate_offset = vupdate_offset;
+ optc1->vupdate_width = vupdate_width;
+
+ if (optc1->vstartup_start == 0) {
BREAK_TO_DEBUGGER();
return;
}
REG_SET(OTG_VSTARTUP_PARAM, 0,
- VSTARTUP_START, optc->dlg_otg_param.vstartup_start);
+ VSTARTUP_START, optc1->vstartup_start);
REG_SET_2(OTG_VUPDATE_PARAM, 0,
- VUPDATE_OFFSET, optc->dlg_otg_param.vupdate_offset,
- VUPDATE_WIDTH, optc->dlg_otg_param.vupdate_width);
+ VUPDATE_OFFSET, optc1->vupdate_offset,
+ VUPDATE_WIDTH, optc1->vupdate_width);
REG_SET(OTG_VREADY_PARAM, 0,
- VREADY_OFFSET, optc->dlg_otg_param.vready_offset);
+ VREADY_OFFSET, optc1->vready_offset);
}
static void optc1_disable_stereo(struct timing_generator *optc)
@@ -132,25 +139,32 @@ void optc1_setup_vertical_interrupt2(
void optc1_program_timing(
struct timing_generator *optc,
const struct dc_crtc_timing *dc_crtc_timing,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ const enum signal_type signal,
bool use_vbios)
{
struct dc_crtc_timing patched_crtc_timing;
- uint32_t vesa_sync_start;
uint32_t asic_blank_end;
uint32_t asic_blank_start;
uint32_t v_total;
uint32_t v_sync_end;
- uint32_t v_init, v_fp2;
uint32_t h_sync_polarity, v_sync_polarity;
uint32_t start_point = 0;
uint32_t field_num = 0;
uint32_t h_div_2;
- int32_t vertical_line_start;
struct optc *optc1 = DCN10TG_FROM_TG(optc);
+ optc1->signal = signal;
+ optc1->vready_offset = vready_offset;
+ optc1->vstartup_start = vstartup_start;
+ optc1->vupdate_offset = vupdate_offset;
+ optc1->vupdate_width = vupdate_width;
patched_crtc_timing = *dc_crtc_timing;
- optc1_apply_front_porch_workaround(optc, &patched_crtc_timing);
+ apply_front_porch_workaround(&patched_crtc_timing);
/* Load horizontal timing */
@@ -163,24 +177,16 @@ void optc1_program_timing(
OTG_H_SYNC_A_START, 0,
OTG_H_SYNC_A_END, patched_crtc_timing.h_sync_width);
- /* asic_h_blank_end = HsyncWidth + HbackPorch =
- * vesa. usHorizontalTotal - vesa. usHorizontalSyncStart -
- * vesa.h_left_border
- */
- vesa_sync_start = patched_crtc_timing.h_addressable +
- patched_crtc_timing.h_border_right +
+ /* blank_start = line end - front porch */
+ asic_blank_start = patched_crtc_timing.h_total -
patched_crtc_timing.h_front_porch;
- asic_blank_end = patched_crtc_timing.h_total -
- vesa_sync_start -
+ /* blank_end = blank_start - active */
+ asic_blank_end = asic_blank_start -
+ patched_crtc_timing.h_border_right -
+ patched_crtc_timing.h_addressable -
patched_crtc_timing.h_border_left;
- /* h_blank_start = v_blank_end + v_active */
- asic_blank_start = asic_blank_end +
- patched_crtc_timing.h_border_left +
- patched_crtc_timing.h_addressable +
- patched_crtc_timing.h_border_right;
-
REG_UPDATE_2(OTG_H_BLANK_START_END,
OTG_H_BLANK_START, asic_blank_start,
OTG_H_BLANK_END, asic_blank_end);
@@ -212,24 +218,15 @@ void optc1_program_timing(
OTG_V_SYNC_A_START, 0,
OTG_V_SYNC_A_END, v_sync_end);
- vesa_sync_start = patched_crtc_timing.v_addressable +
- patched_crtc_timing.v_border_bottom +
+ /* blank_start = frame end - front porch */
+ asic_blank_start = patched_crtc_timing.v_total -
patched_crtc_timing.v_front_porch;
- asic_blank_end = (patched_crtc_timing.v_total -
- vesa_sync_start -
- patched_crtc_timing.v_border_top);
-
- /* v_blank_start = v_blank_end + v_active */
- asic_blank_start = asic_blank_end +
- (patched_crtc_timing.v_border_top +
- patched_crtc_timing.v_addressable +
- patched_crtc_timing.v_border_bottom);
-
- vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1;
- v_fp2 = 0;
- if (vertical_line_start < 0)
- v_fp2 = -vertical_line_start;
+ /* blank_end = blank_start - active */
+ asic_blank_end = asic_blank_start -
+ patched_crtc_timing.v_border_bottom -
+ patched_crtc_timing.v_addressable -
+ patched_crtc_timing.v_border_top;
REG_UPDATE_2(OTG_V_BLANK_START_END,
OTG_V_BLANK_START, asic_blank_start,
@@ -242,10 +239,9 @@ void optc1_program_timing(
REG_UPDATE(OTG_V_SYNC_A_CNTL,
OTG_V_SYNC_A_POL, v_sync_polarity);
- v_init = asic_blank_start;
- if (optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT ||
- optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
- optc->dlg_otg_param.signal == SIGNAL_TYPE_EDP) {
+ if (optc1->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ optc1->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+ optc1->signal == SIGNAL_TYPE_EDP) {
start_point = 1;
if (patched_crtc_timing.flags.INTERLACE == 1)
field_num = 1;
@@ -253,13 +249,10 @@ void optc1_program_timing(
/* Interlace */
if (REG(OTG_INTERLACE_CONTROL)) {
- if (patched_crtc_timing.flags.INTERLACE == 1) {
+ if (patched_crtc_timing.flags.INTERLACE == 1)
REG_UPDATE(OTG_INTERLACE_CONTROL,
OTG_INTERLACE_ENABLE, 1);
- v_init = v_init / 2;
- if ((optc->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end)
- v_fp2 = v_fp2 / 2;
- } else
+ else
REG_UPDATE(OTG_INTERLACE_CONTROL,
OTG_INTERLACE_ENABLE, 0);
}
@@ -268,16 +261,18 @@ void optc1_program_timing(
REG_UPDATE(CONTROL,
VTG0_ENABLE, 0);
- REG_UPDATE_2(CONTROL,
- VTG0_FP2, v_fp2,
- VTG0_VCOUNT_INIT, v_init);
-
/* original code is using VTG offset to address OTG reg, seems wrong */
REG_UPDATE_2(OTG_CONTROL,
OTG_START_POINT_CNTL, start_point,
OTG_FIELD_NUMBER_CNTL, field_num);
- optc1_program_global_sync(optc);
+ optc->funcs->program_global_sync(optc,
+ vready_offset,
+ vstartup_start,
+ vupdate_offset,
+ vupdate_width);
+
+ optc->funcs->set_vtg_params(optc, dc_crtc_timing);
/* TODO
* patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1
@@ -296,6 +291,48 @@ void optc1_program_timing(
}
+void optc1_set_vtg_params(struct timing_generator *optc,
+ const struct dc_crtc_timing *dc_crtc_timing)
+{
+ struct dc_crtc_timing patched_crtc_timing;
+ uint32_t asic_blank_end;
+ uint32_t v_init;
+ uint32_t v_fp2 = 0;
+ int32_t vertical_line_start;
+
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ patched_crtc_timing = *dc_crtc_timing;
+ apply_front_porch_workaround(&patched_crtc_timing);
+
+ /* VCOUNT_INIT is the start of blank */
+ v_init = patched_crtc_timing.v_total - patched_crtc_timing.v_front_porch;
+
+ /* end of blank = v_init - active */
+ asic_blank_end = v_init -
+ patched_crtc_timing.v_border_bottom -
+ patched_crtc_timing.v_addressable -
+ patched_crtc_timing.v_border_top;
+
+ /* if VSTARTUP is before VSYNC, FP2 is the offset, otherwise 0 */
+ vertical_line_start = asic_blank_end - optc1->vstartup_start + 1;
+ if (vertical_line_start < 0)
+ v_fp2 = -vertical_line_start;
+
+ /* Interlace */
+ if (REG(OTG_INTERLACE_CONTROL)) {
+ if (patched_crtc_timing.flags.INTERLACE == 1) {
+ v_init = v_init / 2;
+ if ((optc1->vstartup_start/2)*2 > asic_blank_end)
+ v_fp2 = v_fp2 / 2;
+ }
+ }
+
+ REG_UPDATE_2(CONTROL,
+ VTG0_FP2, v_fp2,
+ VTG0_VCOUNT_INIT, v_init);
+}
+
void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
@@ -1420,6 +1457,7 @@ static const struct timing_generator_funcs dcn10_tg_funcs = {
.clear_optc_underflow = optc1_clear_optc_underflow,
.get_crc = optc1_get_crc,
.configure_crc = optc1_configure_crc,
+ .set_vtg_params = optc1_set_vtg_params,
};
void dcn10_timing_generator_init(struct optc *optc1)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
index 4eb9a898c237..651b8caa4b9f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
@@ -446,6 +446,12 @@ struct optc {
uint32_t min_v_sync_width;
uint32_t min_v_blank;
uint32_t min_v_blank_interlace;
+
+ int vstartup_start;
+ int vupdate_offset;
+ int vupdate_width;
+ int vready_offset;
+ enum signal_type signal;
};
void dcn10_timing_generator_init(struct optc *optc);
@@ -481,6 +487,11 @@ bool optc1_validate_timing(
void optc1_program_timing(
struct timing_generator *optc,
const struct dc_crtc_timing *dc_crtc_timing,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ const enum signal_type signal,
bool use_vbios);
void optc1_setup_vertical_interrupt0(
@@ -495,7 +506,11 @@ void optc1_setup_vertical_interrupt2(
uint32_t start_line);
void optc1_program_global_sync(
- struct timing_generator *optc);
+ struct timing_generator *optc,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width);
bool optc1_disable_crtc(struct timing_generator *optc);
@@ -582,4 +597,7 @@ bool optc1_get_crc(struct timing_generator *optc,
bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
+void optc1_set_vtg_params(struct timing_generator *optc,
+ const struct dc_crtc_timing *dc_crtc_timing);
+
#endif /* __DC_TIMING_GENERATOR_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 7eccb54c421d..bfddd51294a2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -29,7 +29,6 @@
#include "resource.h"
#include "include/irq_service_interface.h"
#include "dcn10_resource.h"
-
#include "dcn10_ipp.h"
#include "dcn10_mpc.h"
#include "irq/dcn10/irq_service_dcn10.h"
@@ -153,9 +152,7 @@ enum dcn10_clk_src_array_id {
DCN10_CLK_SRC_PLL2,
DCN10_CLK_SRC_PLL3,
DCN10_CLK_SRC_TOTAL,
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
DCN101_CLK_SRC_TOTAL = DCN10_CLK_SRC_PLL3
-#endif
};
/* begin *********************
@@ -445,7 +442,6 @@ static const struct bios_registers bios_regs = {
HUBP_REG_LIST_DCN10(id)\
}
-
static const struct dcn_mi_registers hubp_regs[] = {
hubp_regs(0),
hubp_regs(1),
@@ -461,7 +457,6 @@ static const struct dcn_mi_mask hubp_mask = {
HUBP_MASK_SH_LIST_DCN10(_MASK)
};
-
static const struct dcn_hubbub_registers hubbub_reg = {
HUBBUB_REG_LIST_DCN10(0)
};
@@ -494,6 +489,27 @@ static const struct dce110_clk_src_mask cs_mask = {
CS_COMMON_MASK_SH_LIST_DCN1_0(_MASK)
};
+
+#define mmMP1_SMN_C2PMSG_91 0x1629B
+#define mmMP1_SMN_C2PMSG_83 0x16293
+#define mmMP1_SMN_C2PMSG_67 0x16283
+
+#define MP1_SMN_C2PMSG_91__CONTENT_MASK 0xffffffffL
+#define MP1_SMN_C2PMSG_83__CONTENT_MASK 0xffffffffL
+#define MP1_SMN_C2PMSG_67__CONTENT_MASK 0xffffffffL
+#define MP1_SMN_C2PMSG_91__CONTENT__SHIFT 0x00000000
+#define MP1_SMN_C2PMSG_83__CONTENT__SHIFT 0x00000000
+#define MP1_SMN_C2PMSG_67__CONTENT__SHIFT 0x00000000
+
+
+static const struct clk_mgr_shift clk_mgr_shift = {
+ CLK_MASK_SH_LIST_RV1(__SHIFT)
+};
+
+static const struct clk_mgr_mask clk_mgr_mask = {
+ CLK_MASK_SH_LIST_RV1(_MASK)
+};
+
static const struct resource_caps res_cap = {
.num_timing_generator = 4,
.num_opp = 4,
@@ -504,7 +520,6 @@ static const struct resource_caps res_cap = {
.num_ddc = 4,
};
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
static const struct resource_caps rv2_res_cap = {
.num_timing_generator = 3,
.num_opp = 3,
@@ -514,7 +529,6 @@ static const struct resource_caps rv2_res_cap = {
.num_pll = 3,
.num_ddc = 3,
};
-#endif
static const struct dc_plane_cap plane_cap = {
.type = DC_PLANE_TYPE_DCN_UNIVERSAL,
@@ -1217,6 +1231,38 @@ static enum dc_status dcn10_get_default_swizzle_mode(struct dc_plane_state *plan
return result;
}
+struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream)
+{
+ int i;
+ int j = -1;
+ struct dc_link *link = stream->link;
+
+ for (i = 0; i < pool->stream_enc_count; i++) {
+ if (!res_ctx->is_stream_enc_acquired[i] &&
+ pool->stream_enc[i]) {
+ /* Store first available for MST second display
+ * in daisy chain use case
+ */
+ j = i;
+ if (pool->stream_enc[i]->id ==
+ link->link_enc->preferred_engine)
+ return pool->stream_enc[i];
+ }
+ }
+
+ /*
+ * For CZ and later, we can allow DIG FE and BE to differ for all display types
+ */
+
+ if (j >= 0)
+ return pool->stream_enc[j];
+
+ return NULL;
+}
+
static const struct dc_cap_funcs cap_funcs = {
.get_dcc_compression_cap = dcn10_get_dcc_compression_cap
};
@@ -1229,7 +1275,8 @@ static const struct resource_funcs dcn10_res_pool_funcs = {
.validate_plane = dcn10_validate_plane,
.validate_global = dcn10_validate_global,
.add_stream_to_ctx = dcn10_add_stream_to_ctx,
- .get_default_swizzle_mode = dcn10_get_default_swizzle_mode
+ .get_default_swizzle_mode = dcn10_get_default_swizzle_mode,
+ .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link
};
static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -1252,11 +1299,9 @@ static bool construct(
ctx->dc_bios->regs = &bios_regs;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
if (ctx->dce_version == DCN_VERSION_1_01)
pool->base.res_cap = &rv2_res_cap;
else
-#endif
pool->base.res_cap = &res_cap;
pool->base.funcs = &dcn10_res_pool_funcs;
@@ -1273,10 +1318,8 @@ static bool construct(
/* max pipe num for ASIC before check pipe fuses */
pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
if (dc->ctx->dce_version == DCN_VERSION_1_01)
pool->base.pipe_count = 3;
-#endif
dc->caps.max_video_width = 3840;
dc->caps.max_downscale_ratio = 200;
dc->caps.i2c_speed_in_khz = 100;
@@ -1309,26 +1352,17 @@ static bool construct(
CLOCK_SOURCE_COMBO_PHY_PLL2,
&clk_src_regs[2], false);
-#ifdef CONFIG_DRM_AMD_DC_DCN1_01
if (dc->ctx->dce_version == DCN_VERSION_1_0) {
pool->base.clock_sources[DCN10_CLK_SRC_PLL3] =
dcn10_clock_source_create(ctx, ctx->dc_bios,
CLOCK_SOURCE_COMBO_PHY_PLL3,
&clk_src_regs[3], false);
}
-#else
- pool->base.clock_sources[DCN10_CLK_SRC_PLL3] =
- dcn10_clock_source_create(ctx, ctx->dc_bios,
- CLOCK_SOURCE_COMBO_PHY_PLL3,
- &clk_src_regs[3], false);
-#endif
pool->base.clk_src_count = DCN10_CLK_SRC_TOTAL;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
if (dc->ctx->dce_version == DCN_VERSION_1_01)
pool->base.clk_src_count = DCN101_CLK_SRC_TOTAL;
-#endif
pool->base.dp_clock_source =
dcn10_clock_source_create(ctx, ctx->dc_bios,
@@ -1343,12 +1377,6 @@ static bool construct(
goto fail;
}
}
- pool->base.clk_mgr = dcn1_clk_mgr_create(ctx);
- if (pool->base.clk_mgr == NULL) {
- dm_error("DC: failed to create display clock!\n");
- BREAK_TO_DEBUGGER();
- goto fail;
- }
pool->base.dmcu = dcn10_dmcu_create(ctx,
&dmcu_regs,
@@ -1374,7 +1402,6 @@ static bool construct(
memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
if (dc->ctx->dce_version == DCN_VERSION_1_01) {
struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
struct dcn_ip_params *dcn_ip = dc->dcn_ip;
@@ -1385,7 +1412,6 @@ static bool construct(
dcn_soc->dram_clock_change_latency = 23;
dcn_ip->max_num_dpp = 3;
}
-#endif
if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
dc->dcn_soc->urgent_latency = 3;
dc->debug.disable_dmcu = true;
@@ -1410,6 +1436,13 @@ static bool construct(
pool->base.pp_smu = dcn10_pp_smu_create(ctx);
+ pool->base.clk_mgr = dcn1_clk_mgr_create(ctx);
+ if (pool->base.clk_mgr == NULL) {
+ dm_error("DC: failed to create display clock!\n");
+ BREAK_TO_DEBUGGER();
+ goto fail;
+ }
+
if (!dc->debug.disable_pplib_clock_request)
dcn_bw_update_from_pplib(dc);
dcn_bw_sync_calcs_and_dml(dc);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
index 999c684a0b36..633025ccb870 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
@@ -42,6 +42,11 @@ struct resource_pool *dcn10_create_resource_pool(
const struct dc_init_data *init_data,
struct dc *dc);
+struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream);
+
#endif /* __DC_RESOURCE_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
index 8ee9f6dc1d62..ba71b5224e7f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -415,6 +415,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(
case COLOR_SPACE_APPCTRL:
case COLOR_SPACE_CUSTOMPOINTS:
case COLOR_SPACE_UNKNOWN:
+ case COLOR_SPACE_YCBCR709_BLACK:
/* do nothing */
break;
}
@@ -726,11 +727,9 @@ void enc1_stream_encoder_update_dp_info_packets(
3, /* packetIndex */
&info_frame->hdrsmd);
- if (info_frame->dpsdp.valid)
- enc1_update_generic_info_packet(
- enc1,
- 4,/* packetIndex */
- &info_frame->dpsdp);
+ /* packetIndex 4 is used for send immediate sdp message, and please
+ * use other packetIndex (such as 5,6) for other info packet
+ */
/* enable/disable transmission of packet(s).
* If enabled, packet transmission begins on the next frame
@@ -738,7 +737,101 @@ void enc1_stream_encoder_update_dp_info_packets(
REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid);
REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid);
REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid);
- REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, info_frame->dpsdp.valid);
+
+
+ /* This bit is the master enable bit.
+ * When enabling secondary stream engine,
+ * this master bit must also be set.
+ * This register shared with audio info frame.
+ * Therefore we need to enable master bit
+ * if at least on of the fields is not 0
+ */
+ value = REG_READ(DP_SEC_CNTL);
+ if (value)
+ REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+}
+
+void enc1_stream_encoder_send_immediate_sdp_message(
+ struct stream_encoder *enc,
+ const uint8_t *custom_sdp_message,
+ unsigned int sdp_message_size)
+{
+ struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+ uint32_t value = 0;
+
+ /* TODOFPGA Figure out a proper number for max_retries polling for lock
+ * use 50 for now.
+ */
+ uint32_t max_retries = 50;
+
+ /* check if GSP4 is transmitted */
+ REG_WAIT(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING,
+ 0, 10, max_retries);
+
+ /* disable GSP4 transmitting */
+ REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 0);
+
+ /* transmit GSP4 at the earliest time in a frame */
+ REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, 1);
+
+ /*we need turn on clock before programming AFMT block*/
+ REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1);
+
+ /* check if HW reading GSP memory */
+ REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT,
+ 0, 10, max_retries);
+
+ /* HW does is not reading GSP memory not reading too long ->
+ * something wrong. clear GPS memory access and notify?
+ * hw SW is writing to GSP memory
+ */
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1);
+
+ /* use generic packet 4 for immediate sdp message */
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL,
+ AFMT_GENERIC_INDEX, 4);
+
+ /* write generic packet header
+ * (4th byte is for GENERIC0 only)
+ */
+ REG_SET_4(AFMT_GENERIC_HDR, 0,
+ AFMT_GENERIC_HB0, custom_sdp_message[0],
+ AFMT_GENERIC_HB1, custom_sdp_message[1],
+ AFMT_GENERIC_HB2, custom_sdp_message[2],
+ AFMT_GENERIC_HB3, custom_sdp_message[3]);
+
+ /* write generic packet contents
+ * (we never use last 4 bytes)
+ * there are 8 (0-7) mmDIG0_AFMT_GENERIC0_x registers
+ */
+ {
+ const uint32_t *content =
+ (const uint32_t *) &custom_sdp_message[4];
+
+ REG_WRITE(AFMT_GENERIC_0, *content++);
+ REG_WRITE(AFMT_GENERIC_1, *content++);
+ REG_WRITE(AFMT_GENERIC_2, *content++);
+ REG_WRITE(AFMT_GENERIC_3, *content++);
+ REG_WRITE(AFMT_GENERIC_4, *content++);
+ REG_WRITE(AFMT_GENERIC_5, *content++);
+ REG_WRITE(AFMT_GENERIC_6, *content++);
+ REG_WRITE(AFMT_GENERIC_7, *content);
+ }
+
+ /* check whether GENERIC4 registers double buffer update in immediate mode
+ * is pending
+ */
+ REG_WAIT(AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING,
+ 0, 10, max_retries);
+
+ /* atomically update double-buffered GENERIC4 registers in immediate mode
+ * (update immediately)
+ */
+ REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+ AFMT_GENERIC4_IMMEDIATE_UPDATE, 1);
+
+ /* enable GSP4 transmitting */
+ REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 1);
/* This bit is the master enable bit.
* When enabling secondary stream engine,
@@ -1462,6 +1555,8 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = {
enc1_stream_encoder_stop_hdmi_info_packets,
.update_dp_info_packets =
enc1_stream_encoder_update_dp_info_packets,
+ .send_immediate_sdp_message =
+ enc1_stream_encoder_send_immediate_sdp_message,
.stop_dp_info_packets =
enc1_stream_encoder_stop_dp_info_packets,
.dp_blank =
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
index e654c2f55971..a292b106a8b1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
@@ -81,6 +81,7 @@
SRI(DP_MSE_RATE_UPDATE, DP, id), \
SRI(DP_PIXEL_FORMAT, DP, id), \
SRI(DP_SEC_CNTL, DP, id), \
+ SRI(DP_SEC_CNTL2, DP, id), \
SRI(DP_STEER_FIFO, DP, id), \
SRI(DP_VID_M, DP, id), \
SRI(DP_VID_N, DP, id), \
@@ -118,10 +119,12 @@ struct dcn10_stream_enc_registers {
uint32_t AFMT_60958_1;
uint32_t AFMT_60958_2;
uint32_t DIG_FE_CNTL;
+ uint32_t DIG_FE_CNTL2;
uint32_t DP_MSE_RATE_CNTL;
uint32_t DP_MSE_RATE_UPDATE;
uint32_t DP_PIXEL_FORMAT;
uint32_t DP_SEC_CNTL;
+ uint32_t DP_SEC_CNTL2;
uint32_t DP_STEER_FIFO;
uint32_t DP_VID_M;
uint32_t DP_VID_N;
@@ -191,6 +194,10 @@ struct dcn10_stream_enc_registers {
SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\
SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\
SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\
SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\
SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\
@@ -245,6 +252,7 @@ struct dcn10_stream_enc_registers {
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE_PENDING, mask_sh),\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE_PENDING, mask_sh),\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE_PENDING, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING, mask_sh),\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE_PENDING, mask_sh),\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE_PENDING, mask_sh),\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE_PENDING, mask_sh),\
@@ -253,6 +261,7 @@ struct dcn10_stream_enc_registers {
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\
+ SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE, mask_sh),\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\
SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\
@@ -260,6 +269,7 @@ struct dcn10_stream_enc_registers {
SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
+ SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_PPS, mask_sh),\
SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\
SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\
SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\
@@ -304,6 +314,7 @@ struct dcn10_stream_enc_registers {
type AFMT_GENERIC2_FRAME_UPDATE_PENDING;\
type AFMT_GENERIC3_FRAME_UPDATE_PENDING;\
type AFMT_GENERIC4_FRAME_UPDATE_PENDING;\
+ type AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING;\
type AFMT_GENERIC5_FRAME_UPDATE_PENDING;\
type AFMT_GENERIC6_FRAME_UPDATE_PENDING;\
type AFMT_GENERIC7_FRAME_UPDATE_PENDING;\
@@ -312,6 +323,7 @@ struct dcn10_stream_enc_registers {
type AFMT_GENERIC2_FRAME_UPDATE;\
type AFMT_GENERIC3_FRAME_UPDATE;\
type AFMT_GENERIC4_FRAME_UPDATE;\
+ type AFMT_GENERIC4_IMMEDIATE_UPDATE;\
type AFMT_GENERIC5_FRAME_UPDATE;\
type AFMT_GENERIC6_FRAME_UPDATE;\
type AFMT_GENERIC7_FRAME_UPDATE;\
@@ -366,7 +378,12 @@ struct dcn10_stream_enc_registers {
type DP_SEC_GSP5_ENABLE;\
type DP_SEC_GSP6_ENABLE;\
type DP_SEC_GSP7_ENABLE;\
+ type DP_SEC_GSP7_PPS;\
type DP_SEC_GSP7_SEND;\
+ type DP_SEC_GSP4_SEND;\
+ type DP_SEC_GSP4_SEND_PENDING;\
+ type DP_SEC_GSP4_LINE_NUM;\
+ type DP_SEC_GSP4_SEND_ANY_LINE;\
type DP_SEC_MPG_ENABLE;\
type DP_VID_STREAM_DIS_DEFER;\
type DP_VID_STREAM_ENABLE;\
@@ -484,6 +501,11 @@ void enc1_stream_encoder_update_dp_info_packets(
struct stream_encoder *enc,
const struct encoder_info_frame *info_frame);
+void enc1_stream_encoder_send_immediate_sdp_message(
+ struct stream_encoder *enc,
+ const uint8_t *custom_sdp_message,
+ unsigned int sdp_message_size);
+
void enc1_stream_encoder_stop_dp_info_packets(
struct stream_encoder *enc);
diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index 4fc4208d1472..9f7ebf6a4e40 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -80,6 +80,7 @@ struct pp_smu_funcs_rv {
/* PPSMC_MSG_SetDisplayCount
* 0 triggers S0i2 optimization
*/
+
void (*set_display_count)(struct pp_smu *pp, int count);
/* reader and writer WM's are sent together as part of one table*/
@@ -115,7 +116,6 @@ struct pp_smu_funcs_rv {
/* PME w/a */
void (*set_pme_wa_enable)(struct pp_smu *pp);
-
};
struct pp_smu_funcs {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index c5b791d158a7..6cc59f138095 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -219,6 +219,9 @@ struct _vcs_dpi_display_pipe_source_params_st {
unsigned char xfc_enable;
unsigned char xfc_slave;
struct _vcs_dpi_display_xfc_params_st xfc_params;
+ //for vstartuplines calculation freesync
+ unsigned char v_total_min;
+ unsigned char v_total_max;
};
struct writeback_st {
int wb_src_height;
@@ -289,6 +292,8 @@ struct _vcs_dpi_display_pipe_dest_params_st {
unsigned char otg_inst;
unsigned char odm_combine;
unsigned char use_maximum_vstartup;
+ unsigned int vtotal_max;
+ unsigned int vtotal_min;
};
struct _vcs_dpi_display_pipe_params_st {
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
index c2028c4744a6..a610fae16280 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
@@ -84,10 +84,6 @@ bool dal_hw_factory_init(
return true;
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
case DCN_VERSION_1_0:
- dal_hw_factory_dcn10_init(factory);
- return true;
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
case DCN_VERSION_1_01:
dal_hw_factory_dcn10_init(factory);
return true;
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
index 236ca28784a9..77615146b96e 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
@@ -84,11 +84,6 @@ bool dal_hw_translate_init(
dal_hw_translate_dcn10_init(translate);
return true;
#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
- case DCN_VERSION_1_01:
- dal_hw_translate_dcn10_init(translate);
- return true;
-#endif
default:
BREAK_TO_DEBUGGER();
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 6f5ab05d6467..539d34d3439c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -123,6 +123,11 @@ struct resource_funcs {
enum dc_status (*get_default_swizzle_mode)(
struct dc_plane_state *plane_state);
+ struct stream_encoder *(*find_first_free_match_stream_enc_for_link)(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream);
+
};
struct audio_support{
@@ -212,6 +217,25 @@ struct plane_resource {
struct dcn_fe_bandwidth bw;
};
+union pipe_update_flags {
+ struct {
+ uint32_t enable : 1;
+ uint32_t disable : 1;
+ uint32_t odm : 1;
+ uint32_t global_sync : 1;
+ uint32_t opp_changed : 1;
+ uint32_t tg_changed : 1;
+ uint32_t mpcc : 1;
+ uint32_t dppclk : 1;
+ uint32_t hubp_interdependent : 1;
+ uint32_t hubp_rq_dlg_ttu : 1;
+ uint32_t gamut_remap : 1;
+ uint32_t scaler : 1;
+ uint32_t viewport : 1;
+ } bits;
+ uint32_t raw;
+};
+
struct pipe_ctx {
struct dc_plane_state *plane_state;
struct dc_stream_state *stream;
@@ -234,6 +258,7 @@ struct pipe_ctx {
struct _vcs_dpi_display_rq_regs_st rq_regs;
struct _vcs_dpi_display_pipe_dest_params_st pipe_dlg_param;
#endif
+ union pipe_update_flags update_flags;
};
struct resource_context {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index 31bd6d5183ab..f3fd3f8cac26 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -31,7 +31,7 @@
struct clk_mgr {
struct dc_context *ctx;
- const struct clk_mgr_funcs *funcs;
+ struct clk_mgr_funcs *funcs;
struct dc_clocks clks;
};
@@ -44,6 +44,12 @@ struct clk_mgr_funcs {
int (*get_dp_ref_clk_frequency)(struct clk_mgr *clk_mgr);
void (*init_clocks)(struct clk_mgr *clk_mgr);
+
+ /* Returns actual clk that's set */
+ int (*set_dispclk)(struct clk_mgr *clk_mgr, int requested_dispclk_khz);
+ int (*set_dprefclk)(struct clk_mgr *clk_mgr);
};
+
+
#endif /* __DAL_CLK_MGR_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
index c9d3e37e9531..ca162079a41b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
@@ -59,6 +59,7 @@ struct encoder_feature_support {
uint32_t IS_TPS3_CAPABLE:1;
uint32_t IS_TPS4_CAPABLE:1;
uint32_t HDMI_6GB_EN:1;
+ uint32_t DP_IS_USB_C:1;
} bits;
uint32_t raw;
} flags;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
index 49854eb73d1d..537563888f87 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
@@ -63,8 +63,6 @@ struct encoder_info_frame {
struct dc_info_packet vsc;
/* HDR Static MetaData */
struct dc_info_packet hdrsmd;
- /* custom sdp message */
- struct dc_info_packet dpsdp;
};
struct encoder_unblank_param {
@@ -123,6 +121,11 @@ struct stream_encoder_funcs {
struct stream_encoder *enc,
const struct encoder_info_frame *info_frame);
+ void (*send_immediate_sdp_message)(
+ struct stream_encoder *enc,
+ const uint8_t *custom_sdp_message,
+ unsigned int sdp_message_size);
+
void (*stop_dp_info_packets)(
struct stream_encoder *enc);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index 067d53caf28a..0b8c6896581f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -70,14 +70,6 @@ enum crtc_state {
CRTC_STATE_VACTIVE
};
-struct _dlg_otg_param {
- int vstartup_start;
- int vupdate_offset;
- int vupdate_width;
- int vready_offset;
- enum signal_type signal;
-};
-
struct vupdate_keepout_params {
int start_offset;
int end_offset;
@@ -126,7 +118,6 @@ struct timing_generator {
const struct timing_generator_funcs *funcs;
struct dc_bios *bp;
struct dc_context *ctx;
- struct _dlg_otg_param dlg_otg_param;
int inst;
};
@@ -140,7 +131,13 @@ struct timing_generator_funcs {
const struct dc_crtc_timing *timing);
void (*program_timing)(struct timing_generator *tg,
const struct dc_crtc_timing *timing,
- bool use_vbios);
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width,
+ const enum signal_type signal,
+ bool use_vbios
+ );
void (*setup_vertical_interrupt0)(
struct timing_generator *optc,
uint32_t start_line,
@@ -210,7 +207,11 @@ struct timing_generator_funcs {
bool (*arm_vert_intr)(struct timing_generator *tg, uint8_t width);
- void (*program_global_sync)(struct timing_generator *tg);
+ void (*program_global_sync)(struct timing_generator *tg,
+ int vready_offset,
+ int vstartup_start,
+ int vupdate_offset,
+ int vupdate_width);
void (*enable_optc_clock)(struct timing_generator *tg, bool enable);
void (*program_stereo)(struct timing_generator *tg,
const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags);
@@ -237,6 +238,8 @@ struct timing_generator_funcs {
bool (*get_crc)(struct timing_generator *tg,
uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb);
+ void (*set_vtg_params)(struct timing_generator *optc,
+ const struct dc_crtc_timing *dc_crtc_timing);
};
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index 33905468e2b9..eb1c12ed026a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -158,6 +158,11 @@ struct hw_sequencer_funcs {
void (*update_info_frame)(struct pipe_ctx *pipe_ctx);
+ void (*send_immediate_sdp_message)(
+ struct pipe_ctx *pipe_ctx,
+ const uint8_t *custom_sdp_message,
+ unsigned int sdp_message_size);
+
void (*enable_stream)(struct pipe_ctx *pipe_ctx);
void (*disable_stream)(struct pipe_ctx *pipe_ctx,
diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h
index 01bf01a34a08..c30437ae8395 100644
--- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h
+++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h
@@ -307,7 +307,8 @@ struct bp_encoder_cap_info {
uint32_t DP_HBR2_EN:1;
uint32_t DP_HBR3_EN:1;
uint32_t HDMI_6GB_EN:1;
- uint32_t RESERVED:30;
+ uint32_t DP_IS_USB_C:1;
+ uint32_t RESERVED:27;
};
#endif /*__DAL_BIOS_PARSER_TYPES_H__ */
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index 4c8ce7938f01..63c3e77159d9 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -131,21 +131,18 @@
#define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */
#define RAVEN_A0 0x01
#define RAVEN_B0 0x21
-#define PICASSO_A0 0x41
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
/* DCN1_01 */
+#define PICASSO_A0 0x41
#define RAVEN2_A0 0x81
-#endif
+#define RAVEN1_F0 0xF0
#define RAVEN_UNKNOWN 0xFF
#define ASIC_REV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN)
#define RAVEN1_F0 0xF0
#define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN))
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
#define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0))
#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0))
-#endif /* DCN1_01 */
#define FAMILY_RV 142 /* DCN 1*/
diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h
index f5bd869d4320..dabdbc0999d4 100644
--- a/drivers/gpu/drm/amd/display/include/dal_types.h
+++ b/drivers/gpu/drm/amd/display/include/dal_types.h
@@ -45,9 +45,7 @@ enum dce_version {
DCE_VERSION_12_1,
DCE_VERSION_MAX,
DCN_VERSION_1_0,
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
DCN_VERSION_1_01,
-#endif /* DCN1_01 */
DCN_VERSION_MAX
};
diff --git a/drivers/gpu/drm/amd/display/include/set_mode_types.h b/drivers/gpu/drm/amd/display/include/set_mode_types.h
index 2b836e582c08..845fea8a387f 100644
--- a/drivers/gpu/drm/amd/display/include/set_mode_types.h
+++ b/drivers/gpu/drm/amd/display/include/set_mode_types.h
@@ -84,7 +84,10 @@ union hdmi_info_packet {
uint16_t bar_left;
uint16_t bar_right;
- uint8_t reserved[14];
+ uint8_t F140_F143:4;
+ uint8_t ACE0_ACE3:4;
+
+ uint8_t reserved[13];
} bits;
struct info_packet_raw_data packet_raw_data;
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index a1055413bade..8601d371776e 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -240,16 +240,27 @@ struct dividers {
struct fixed31_32 divider3;
};
-static void build_coefficients(struct gamma_coefficients *coefficients, bool is_2_4)
+enum gamma_type_index {
+ gamma_type_index_2_4,
+ gamma_type_index_2_2,
+ gamma_type_index_2_2_flat
+};
+
+static void build_coefficients(struct gamma_coefficients *coefficients, enum gamma_type_index type)
{
- static const int32_t numerator01[] = { 31308, 180000};
- static const int32_t numerator02[] = { 12920, 4500};
- static const int32_t numerator03[] = { 55, 99};
- static const int32_t numerator04[] = { 55, 99};
- static const int32_t numerator05[] = { 2400, 2200};
+ static const int32_t numerator01[] = { 31308, 180000, 0};
+ static const int32_t numerator02[] = { 12920, 4500, 0};
+ static const int32_t numerator03[] = { 55, 99, 0};
+ static const int32_t numerator04[] = { 55, 99, 0};
+ static const int32_t numerator05[] = { 2400, 2200, 2200};
uint32_t i = 0;
- uint32_t index = is_2_4 == true ? 0:1;
+ uint32_t index = 0;
+
+ if (type == gamma_type_index_2_2)
+ index = 1;
+ else if (type == gamma_type_index_2_2_flat)
+ index = 2;
do {
coefficients->a0[i] = dc_fixpt_from_fraction(
@@ -697,7 +708,7 @@ static void build_de_pq(struct pwl_float_data_ex *de_pq,
static void build_regamma(struct pwl_float_data_ex *rgb_regamma,
uint32_t hw_points_num,
- const struct hw_x_point *coordinate_x, bool is_2_4)
+ const struct hw_x_point *coordinate_x, enum gamma_type_index type)
{
uint32_t i;
@@ -705,7 +716,7 @@ static void build_regamma(struct pwl_float_data_ex *rgb_regamma,
struct pwl_float_data_ex *rgb = rgb_regamma;
const struct hw_x_point *coord_x = coordinate_x;
- build_coefficients(&coeff, is_2_4);
+ build_coefficients(&coeff, type);
i = 0;
@@ -892,13 +903,13 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
static void build_degamma(struct pwl_float_data_ex *curve,
uint32_t hw_points_num,
- const struct hw_x_point *coordinate_x, bool is_2_4)
+ const struct hw_x_point *coordinate_x, enum gamma_type_index type)
{
uint32_t i;
struct gamma_coefficients coeff;
uint32_t begin_index, end_index;
- build_coefficients(&coeff, is_2_4);
+ build_coefficients(&coeff, type);
i = 0;
/* X points is 2^-25 to 2^7
@@ -1614,7 +1625,7 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
coordinates_x,
output_tf->sdr_ref_white_level);
} else if (tf == TRANSFER_FUNCTION_GAMMA22 &&
- fs_params != NULL) {
+ fs_params != NULL && fs_params->skip_tm == 0) {
build_freesync_hdr(rgb_regamma,
MAX_HW_POINTS,
coordinates_x,
@@ -1627,7 +1638,9 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
build_regamma(rgb_regamma,
MAX_HW_POINTS,
- coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? true:false);
+ coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? gamma_type_index_2_4 :
+ tf == TRANSFER_FUNCTION_GAMMA22 ?
+ gamma_type_index_2_2_flat : gamma_type_index_2_2);
}
map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
coordinates_x, axis_x, rgb_regamma,
@@ -1832,7 +1845,9 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
build_degamma(curve,
MAX_HW_POINTS,
coordinates_x,
- tf == TRANSFER_FUNCTION_SRGB ? true : false);
+ tf == TRANSFER_FUNCTION_SRGB ?
+ gamma_type_index_2_4 : tf == TRANSFER_FUNCTION_GAMMA22 ?
+ gamma_type_index_2_2_flat : gamma_type_index_2_2);
else if (tf == TRANSFER_FUNCTION_LINEAR) {
// just copy coordinates_x into curve
i = 0;
@@ -1932,7 +1947,10 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans,
build_regamma(rgb_regamma,
MAX_HW_POINTS,
- coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false);
+ coordinates_x,
+ trans == TRANSFER_FUNCTION_SRGB ?
+ gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ?
+ gamma_type_index_2_2_flat : gamma_type_index_2_2);
for (i = 0; i <= MAX_HW_POINTS ; i++) {
points->red[i] = rgb_regamma[i].r;
points->green[i] = rgb_regamma[i].g;
@@ -2002,7 +2020,8 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
kvfree(rgb_degamma);
} else if (trans == TRANSFER_FUNCTION_SRGB ||
- trans == TRANSFER_FUNCTION_BT709) {
+ trans == TRANSFER_FUNCTION_BT709 ||
+ trans == TRANSFER_FUNCTION_GAMMA22) {
rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
sizeof(*rgb_degamma),
GFP_KERNEL);
@@ -2011,7 +2030,10 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
build_degamma(rgb_degamma,
MAX_HW_POINTS,
- coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false);
+ coordinates_x,
+ trans == TRANSFER_FUNCTION_SRGB ?
+ gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ?
+ gamma_type_index_2_2_flat : gamma_type_index_2_2);
for (i = 0; i <= MAX_HW_POINTS ; i++) {
points->red[i] = rgb_degamma[i].r;
points->green[i] = rgb_degamma[i].g;
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
index a6e164df090a..369953fafadf 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
@@ -79,6 +79,7 @@ struct freesync_hdr_tf_params {
unsigned int max_content; // luminance in nits
unsigned int min_display; // luminance in 1/10000 nits
unsigned int max_display; // luminance in nits
+ unsigned int skip_tm; // skip tm
};
void setup_x_points_distribution(void);
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index db06fab2ad5c..bc13c552797f 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -63,7 +63,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
if (stream->psr_version != 0)
vscPacketRevision = 2;
- if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
+ /* Update to revision 5 for extended colorimetry support for DPCD 1.4+ */
+ if (stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
+ stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
vscPacketRevision = 5;
/* VSC packet not needed based on the features
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
index a9575db8d7aa..6efcaa93e17b 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
@@ -30,4 +30,22 @@
#define mmDF_CS_UMC_AON0_DramBaseAddress0 0x0044
#define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX 0
+#define smnPerfMonCtlLo0 0x01d440UL
+#define smnPerfMonCtlHi0 0x01d444UL
+#define smnPerfMonCtlLo1 0x01d450UL
+#define smnPerfMonCtlHi1 0x01d454UL
+#define smnPerfMonCtlLo2 0x01d460UL
+#define smnPerfMonCtlHi2 0x01d464UL
+#define smnPerfMonCtlLo3 0x01d470UL
+#define smnPerfMonCtlHi3 0x01d474UL
+
+#define smnPerfMonCtrLo0 0x01d448UL
+#define smnPerfMonCtrHi0 0x01d44cUL
+#define smnPerfMonCtrLo1 0x01d458UL
+#define smnPerfMonCtrHi1 0x01d45cUL
+#define smnPerfMonCtrLo2 0x01d468UL
+#define smnPerfMonCtrHi2 0x01d46cUL
+#define smnPerfMonCtrLo3 0x01d478UL
+#define smnPerfMonCtrHi3 0x01d47cUL
+
#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
index 529b37db274c..f1d048e0ed2c 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
@@ -829,6 +829,8 @@
#define mmTD_CNTL_BASE_IDX 0
#define mmTD_STATUS 0x0526
#define mmTD_STATUS_BASE_IDX 0
+#define mmTD_EDC_CNT 0x052e
+#define mmTD_EDC_CNT_BASE_IDX 0
#define mmTD_DSM_CNTL 0x052f
#define mmTD_DSM_CNTL_BASE_IDX 0
#define mmTD_DSM_CNTL2 0x0530
@@ -845,6 +847,8 @@
#define mmTA_STATUS_BASE_IDX 0
#define mmTA_SCRATCH 0x0564
#define mmTA_SCRATCH_BASE_IDX 0
+#define mmTA_EDC_CNT 0x0586
+#define mmTA_EDC_CNT_BASE_IDX 0
// addressBlock: gc_gdsdec
@@ -1051,6 +1055,13 @@
#define mmGC_USER_RB_BACKEND_DISABLE_BASE_IDX 0
+// addressBlock: gc_ea_gceadec2
+// base address: 0x9c00
+#define mmGCEA_EDC_CNT 0x0706
+#define mmGCEA_EDC_CNT_BASE_IDX 0
+#define mmGCEA_EDC_CNT2 0x0707
+#define mmGCEA_EDC_CNT2_BASE_IDX 0
+
// addressBlock: gc_rmi_rmidec
// base address: 0x9e00
#define mmRMI_GENERAL_CNTL 0x0780
@@ -1709,6 +1720,8 @@
#define mmTC_CFG_L1_VOLATILE_BASE_IDX 0
#define mmTC_CFG_L2_VOLATILE 0x0b23
#define mmTC_CFG_L2_VOLATILE_BASE_IDX 0
+#define mmTCI_EDC_CNT 0x0b60
+#define mmTCI_EDC_CNT_BASE_IDX 0
#define mmTCI_STATUS 0x0b61
#define mmTCI_STATUS_BASE_IDX 0
#define mmTCI_CNTL_1 0x0b62
@@ -2594,6 +2607,24 @@
#define mmCP_RB_DOORBELL_CONTROL_SCH_7_BASE_IDX 0
#define mmCP_RB_DOORBELL_CLEAR 0x1188
#define mmCP_RB_DOORBELL_CLEAR_BASE_IDX 0
+#define mmCPF_EDC_TAG_CNT 0x1189
+#define mmCPF_EDC_TAG_CNT_BASE_IDX 0
+#define mmCPF_EDC_ROQ_CNT 0x118a
+#define mmCPF_EDC_ROQ_CNT_BASE_IDX 0
+#define mmCPG_EDC_TAG_CNT 0x118b
+#define mmCPG_EDC_TAG_CNT_BASE_IDX 0
+#define mmCPG_EDC_DMA_CNT 0x118d
+#define mmCPG_EDC_DMA_CNT_BASE_IDX 0
+#define mmCPC_EDC_SCRATCH_CNT 0x118e
+#define mmCPC_EDC_SCRATCH_CNT_BASE_IDX 0
+#define mmCPC_EDC_UCODE_CNT 0x118f
+#define mmCPC_EDC_UCODE_CNT_BASE_IDX 0
+#define mmDC_EDC_STATE_CNT 0x1191
+#define mmDC_EDC_STATE_CNT_BASE_IDX 0
+#define mmDC_EDC_CSINVOC_CNT 0x1192
+#define mmDC_EDC_CSINVOC_CNT_BASE_IDX 0
+#define mmDC_EDC_RESTORE_CNT 0x1193
+#define mmDC_EDC_RESTORE_CNT_BASE_IDX 0
#define mmCP_GFX_MQD_CONTROL 0x11a0
#define mmCP_GFX_MQD_CONTROL_BASE_IDX 0
#define mmCP_GFX_MQD_BASE_ADDR 0x11a1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h
index 8c75669eb500..9470ec5e0f42 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h
@@ -54,5 +54,8 @@
#define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258
#define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c
+#define smnPCIE_RX_NUM_NAK 0x11180038
+#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c
+
#endif // _nbio_6_1_SMN_HEADER
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h
index 5563f0715896..caf5ffdc130a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h
@@ -51,4 +51,7 @@
#define smnPCIE_PERF_COUNT0_TXCLK2 0x11180258
#define smnPCIE_PERF_COUNT1_TXCLK2 0x1118025c
+#define smnPCIE_RX_NUM_NAK 0x11180038
+#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c
+
#endif // _nbio_7_0_SMN_HEADER
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
index c1457d880c4d..4bcacf529852 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
@@ -50,4 +50,7 @@
#define smnPCIE_PERF_CNTL_EVENT_LC_PORT_SEL 0x1118024c
#define smnPCIE_PERF_CNTL_EVENT_CI_PORT_SEL 0x11180250
+#define smnPCIE_RX_NUM_NAK 0x11180038
+#define smnPCIE_RX_NUM_NAK_GENERATED 0x1118003c
+
#endif // _nbio_7_4_0_SMN_HEADER
diff --git a/drivers/gpu/drm/amd/include/cik_structs.h b/drivers/gpu/drm/amd/include/cik_structs.h
index 749eab94e335..699e658c3cec 100644
--- a/drivers/gpu/drm/amd/include/cik_structs.h
+++ b/drivers/gpu/drm/amd/include/cik_structs.h
@@ -282,8 +282,7 @@ struct cik_sdma_rlc_registers {
uint32_t reserved_123;
uint32_t reserved_124;
uint32_t reserved_125;
- uint32_t reserved_126;
- uint32_t reserved_127;
+ /* reserved_126,127: repurposed for driver-internal use */
uint32_t sdma_engine_id;
uint32_t sdma_queue_id;
};
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index b897aca9b4c9..98b9533e672b 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -174,6 +174,7 @@ struct tile_config {
#define ALLOC_MEM_FLAGS_GTT (1 << 1)
#define ALLOC_MEM_FLAGS_USERPTR (1 << 2)
#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
+#define ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4)
/*
* Allocation flags attributes/access options.
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 2b579ba9b685..9f661bf96ed0 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -109,8 +109,12 @@ enum amd_pp_sensors {
AMDGPU_PP_SENSOR_UVD_DCLK,
AMDGPU_PP_SENSOR_VCE_ECCLK,
AMDGPU_PP_SENSOR_GPU_LOAD,
+ AMDGPU_PP_SENSOR_MEM_LOAD,
AMDGPU_PP_SENSOR_GFX_MCLK,
AMDGPU_PP_SENSOR_GPU_TEMP,
+ AMDGPU_PP_SENSOR_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP,
+ AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
+ AMDGPU_PP_SENSOR_MEM_TEMP,
AMDGPU_PP_SENSOR_VCE_POWER,
AMDGPU_PP_SENSOR_UVD_POWER,
AMDGPU_PP_SENSOR_GPU_POWER,
@@ -159,6 +163,13 @@ struct pp_states_info {
uint32_t states[16];
};
+enum PP_HWMON_TEMP {
+ PP_TEMP_EDGE = 0,
+ PP_TEMP_JUNCTION,
+ PP_TEMP_MEM,
+ PP_TEMP_MAX
+};
+
#define PP_GROUP_MASK 0xF0000000
#define PP_GROUP_SHIFT 28
diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h
index ceaf4932258d..8b383dbe1cda 100644
--- a/drivers/gpu/drm/amd/include/v9_structs.h
+++ b/drivers/gpu/drm/amd/include/v9_structs.h
@@ -151,8 +151,7 @@ struct v9_sdma_mqd {
uint32_t reserved_123;
uint32_t reserved_124;
uint32_t reserved_125;
- uint32_t reserved_126;
- uint32_t reserved_127;
+ /* reserved_126,127: repurposed for driver-internal use */
uint32_t sdma_engine_id;
uint32_t sdma_queue_id;
};
diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h
index 717fbae1d362..c17613287cd0 100644
--- a/drivers/gpu/drm/amd/include/vi_structs.h
+++ b/drivers/gpu/drm/amd/include/vi_structs.h
@@ -151,8 +151,7 @@ struct vi_sdma_mqd {
uint32_t reserved_123;
uint32_t reserved_124;
uint32_t reserved_125;
- uint32_t reserved_126;
- uint32_t reserved_127;
+ /* reserved_126,127: repurposed for driver-internal use */
uint32_t sdma_engine_id;
uint32_t sdma_queue_id;
};
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index eec329ab6037..3026c7e2d3ea 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -30,6 +30,36 @@
#include "atom.h"
#include "amd_pcie.h"
+int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version)
+{
+ int ret = 0;
+
+ if (!if_version && !smu_version)
+ return -EINVAL;
+
+ if (if_version) {
+ ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion);
+ if (ret)
+ return ret;
+
+ ret = smu_read_smc_arg(smu, if_version);
+ if (ret)
+ return ret;
+ }
+
+ if (smu_version) {
+ ret = smu_send_smc_msg(smu, SMU_MSG_GetSmuVersion);
+ if (ret)
+ return ret;
+
+ ret = smu_read_smc_arg(smu, smu_version);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
bool gate)
{
@@ -168,6 +198,8 @@ int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size)
ATOM_COMMON_TABLE_HEADER *header = (ATOM_COMMON_TABLE_HEADER *)buf;
int ret = 0;
+ if (!smu->pm_enabled)
+ return -EINVAL;
if (header->usStructureSize != size) {
pr_err("pp table size not matched !\n");
return -EIO;
@@ -203,6 +235,8 @@ int smu_feature_init_dpm(struct smu_context *smu)
int ret = 0;
uint32_t unallowed_feature_mask[SMU_FEATURE_MAX/32];
+ if (!smu->pm_enabled)
+ return ret;
mutex_lock(&feature->mutex);
bitmap_fill(feature->allowed, SMU_FEATURE_MAX);
mutex_unlock(&feature->mutex);
@@ -314,6 +348,7 @@ static int smu_early_init(void *handle)
struct smu_context *smu = &adev->smu;
smu->adev = adev;
+ smu->pm_enabled = !!amdgpu_dpm;
mutex_init(&smu->mutex);
return smu_set_funcs(adev);
@@ -323,6 +358,9 @@ static int smu_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = &adev->smu;
+
+ if (!smu->pm_enabled)
+ return 0;
mutex_lock(&smu->mutex);
smu_handle_task(&adev->smu,
smu->smu_dpm.dpm_level,
@@ -406,9 +444,6 @@ static int smu_sw_init(void *handle)
struct smu_context *smu = &adev->smu;
int ret;
- if (!is_support_sw_smu(adev))
- return -EINVAL;
-
smu->pool_size = adev->pm.smu_prv_buffer_size;
smu->smu_feature.feature_num = SMU_FEATURE_MAX;
mutex_init(&smu->smu_feature.mutex);
@@ -460,9 +495,6 @@ static int smu_sw_fini(void *handle)
struct smu_context *smu = &adev->smu;
int ret;
- if (!is_support_sw_smu(adev))
- return -EINVAL;
-
ret = smu_smc_table_sw_fini(smu);
if (ret) {
pr_err("Failed to sw fini smc table!\n");
@@ -612,10 +644,6 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
* check if the format_revision in vbios is up to pptable header
* version, and the structure size is not 0.
*/
- ret = smu_get_clk_info_from_vbios(smu);
- if (ret)
- return ret;
-
ret = smu_check_pptable(smu);
if (ret)
return ret;
@@ -716,6 +744,9 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
*/
ret = smu_set_tool_table_location(smu);
+ if (!smu_is_dpm_running(smu))
+ pr_info("dpm has been disabled\n");
+
return ret;
}
@@ -788,9 +819,6 @@ static int smu_hw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = &adev->smu;
- if (!is_support_sw_smu(adev))
- return -EINVAL;
-
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
ret = smu_load_microcode(smu);
if (ret)
@@ -831,7 +859,10 @@ static int smu_hw_init(void *handle)
mutex_unlock(&smu->mutex);
- adev->pm.dpm_enabled = true;
+ if (!smu->pm_enabled)
+ adev->pm.dpm_enabled = false;
+ else
+ adev->pm.dpm_enabled = true;
pr_info("SMU is initialized successfully!\n");
@@ -849,9 +880,6 @@ static int smu_hw_fini(void *handle)
struct smu_table_context *table_context = &smu->smu_table;
int ret = 0;
- if (!is_support_sw_smu(adev))
- return -EINVAL;
-
kfree(table_context->driver_pptable);
table_context->driver_pptable = NULL;
@@ -906,9 +934,6 @@ static int smu_suspend(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = &adev->smu;
- if (!is_support_sw_smu(adev))
- return -EINVAL;
-
ret = smu_system_features_control(smu, false);
if (ret)
return ret;
@@ -924,9 +949,6 @@ static int smu_resume(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = &adev->smu;
- if (!is_support_sw_smu(adev))
- return -EINVAL;
-
pr_info("SMU is resuming...\n");
mutex_lock(&smu->mutex);
@@ -955,7 +977,7 @@ int smu_display_configuration_change(struct smu_context *smu,
int index = 0;
int num_of_active_display = 0;
- if (!is_support_sw_smu(smu->adev))
+ if (!smu->pm_enabled || !is_support_sw_smu(smu->adev))
return -EINVAL;
if (!display_config)
@@ -1083,7 +1105,7 @@ static int smu_enable_umd_pstate(void *handle,
struct smu_context *smu = (struct smu_context*)(handle);
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
- if (!smu_dpm_ctx->dpm_context)
+ if (!smu->pm_enabled || !smu_dpm_ctx->dpm_context)
return -EINVAL;
if (!(smu_dpm_ctx->dpm_level & profile_mode_mask)) {
@@ -1126,6 +1148,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
long workload;
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+ if (!smu->pm_enabled)
+ return -EINVAL;
if (!skip_display_settings) {
ret = smu_display_config_changed(smu);
if (ret) {
@@ -1134,6 +1158,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
}
}
+ if (!smu->pm_enabled)
+ return -EINVAL;
ret = smu_apply_clocks_adjust_rules(smu);
if (ret) {
pr_err("Failed to apply clocks adjust rules!");
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 70f7f47a2fcf..cc57fb953e62 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -225,7 +225,16 @@ int phm_register_irq_handlers(struct pp_hwmgr *hwmgr)
int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
{
int ret = 0;
- struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX};
+ struct PP_TemperatureRange range = {
+ TEMP_RANGE_MIN,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MIN,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MIN,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MAX};
struct amdgpu_device *adev = hwmgr->adev;
if (hwmgr->hwmgr_func->get_thermal_temperature_range)
@@ -239,6 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
adev->pm.dpm.thermal.min_temp = range.min;
adev->pm.dpm.thermal.max_temp = range.max;
+ adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max;
+ adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min;
+ adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max;
+ adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max;
+ adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
+ adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
+ adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
return ret;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 048757e8f494..16591be8b0ca 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -3532,9 +3532,12 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*size = 4;
return 0;
case AMDGPU_PP_SENSOR_GPU_LOAD:
+ case AMDGPU_PP_SENSOR_MEM_LOAD:
offset = data->soft_regs_start + smum_get_offsetof(hwmgr,
SMU_SoftRegisters,
- AverageGraphicsActivity);
+ (idx == AMDGPU_PP_SENSOR_GPU_LOAD) ?
+ AverageGraphicsActivity:
+ AverageMemoryActivity);
activity_percent = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset);
activity_percent += 0x80;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 384c37875cd0..1d9bb29adaef 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -356,6 +356,7 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
struct vega10_hwmgr *data = hwmgr->backend;
int i;
uint32_t sub_vendor_id, hw_revision;
+ uint32_t top32, bottom32;
struct amdgpu_device *adev = hwmgr->adev;
vega10_initialize_power_tune_defaults(hwmgr);
@@ -499,6 +500,14 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
(hw_revision == 0) &&
(sub_vendor_id != 0x1002))
data->smu_features[GNLD_PCC_LIMIT].supported = true;
+
+ /* Get the SN to turn into a Unique ID */
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
+ top32 = smum_get_argument(hwmgr);
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
+ bottom32 = smum_get_argument(hwmgr);
+
+ adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
}
#ifdef PPLIB_VEGA10_EVV_SUPPORT
@@ -2267,8 +2276,8 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
pp_table->AcgAvfsGb.m1 = avfs_params.ulAcgGbFuseTableM1;
pp_table->AcgAvfsGb.m2 = avfs_params.ulAcgGbFuseTableM2;
pp_table->AcgAvfsGb.b = avfs_params.ulAcgGbFuseTableB;
- pp_table->AcgAvfsGb.m1_shift = 0;
- pp_table->AcgAvfsGb.m2_shift = 0;
+ pp_table->AcgAvfsGb.m1_shift = 24;
+ pp_table->AcgAvfsGb.m2_shift = 12;
pp_table->AcgAvfsGb.b_shift = 0;
} else {
@@ -2364,6 +2373,10 @@ static int vega10_avfs_enable(struct pp_hwmgr *hwmgr, bool enable)
struct vega10_hwmgr *data = hwmgr->backend;
if (data->smu_features[GNLD_AVFS].supported) {
+ /* Already enabled or disabled */
+ if (!(enable ^ data->smu_features[GNLD_AVFS].enabled))
+ return 0;
+
if (enable) {
PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr,
true,
@@ -2466,11 +2479,6 @@ static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr)
return;
}
}
-
- if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
- data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
- data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK;
- }
}
/**
@@ -3683,6 +3691,10 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr,
vega10_update_avfs(hwmgr);
+ /*
+ * Clear all OD flags except DPMTABLE_OD_UPDATE_VDDC.
+ * That will help to keep AVFS disabled.
+ */
data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC;
return 0;
@@ -3785,6 +3797,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr);
*size = 4;
break;
+ case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHotspot);
+ *((uint32_t *)value) = smum_get_argument(hwmgr) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
+ case AMDGPU_PP_SENSOR_MEM_TEMP:
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM);
+ *((uint32_t *)value) = smum_get_argument(hwmgr) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
case AMDGPU_PP_SENSOR_UVD_POWER:
*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
*size = 4;
@@ -4852,12 +4876,22 @@ static int vega10_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
struct PP_TemperatureRange *thermal_data)
{
- struct phm_ppt_v2_information *table_info =
- (struct phm_ppt_v2_information *)hwmgr->pptable;
+ struct vega10_hwmgr *data = hwmgr->backend;
+ PPTable_t *pp_table = &(data->smc_state_table.pp_table);
memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
- thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp *
+ thermal_data->max = pp_table->TedgeLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->mem_crit_max = pp_table->ThbmLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
return 0;
@@ -4988,13 +5022,70 @@ static bool vega10_check_clk_voltage_valid(struct pp_hwmgr *hwmgr,
return true;
}
+static void vega10_odn_update_power_state(struct pp_hwmgr *hwmgr)
+{
+ struct vega10_hwmgr *data = hwmgr->backend;
+ struct pp_power_state *ps = hwmgr->request_ps;
+ struct vega10_power_state *vega10_ps;
+ struct vega10_single_dpm_table *gfx_dpm_table =
+ &data->dpm_table.gfx_table;
+ struct vega10_single_dpm_table *soc_dpm_table =
+ &data->dpm_table.soc_table;
+ struct vega10_single_dpm_table *mem_dpm_table =
+ &data->dpm_table.mem_table;
+ int max_level;
+
+ if (!ps)
+ return;
+
+ vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
+ max_level = vega10_ps->performance_level_count - 1;
+
+ if (vega10_ps->performance_levels[max_level].gfx_clock !=
+ gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value)
+ vega10_ps->performance_levels[max_level].gfx_clock =
+ gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value;
+
+ if (vega10_ps->performance_levels[max_level].soc_clock !=
+ soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value)
+ vega10_ps->performance_levels[max_level].soc_clock =
+ soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value;
+
+ if (vega10_ps->performance_levels[max_level].mem_clock !=
+ mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value)
+ vega10_ps->performance_levels[max_level].mem_clock =
+ mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value;
+
+ if (!hwmgr->ps)
+ return;
+
+ ps = (struct pp_power_state *)((unsigned long)(hwmgr->ps) + hwmgr->ps_size * (hwmgr->num_ps - 1));
+ vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
+ max_level = vega10_ps->performance_level_count - 1;
+
+ if (vega10_ps->performance_levels[max_level].gfx_clock !=
+ gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value)
+ vega10_ps->performance_levels[max_level].gfx_clock =
+ gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value;
+
+ if (vega10_ps->performance_levels[max_level].soc_clock !=
+ soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value)
+ vega10_ps->performance_levels[max_level].soc_clock =
+ soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value;
+
+ if (vega10_ps->performance_levels[max_level].mem_clock !=
+ mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value)
+ vega10_ps->performance_levels[max_level].mem_clock =
+ mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value;
+}
+
static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
enum PP_OD_DPM_TABLE_COMMAND type)
{
struct vega10_hwmgr *data = hwmgr->backend;
struct phm_ppt_v2_information *table_info = hwmgr->pptable;
struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = table_info->vdd_dep_on_socclk;
- struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.soc_table;
+ struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.mem_table;
struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_on_socclk =
&data->odn_dpm_table.vdd_dep_on_socclk;
@@ -5018,7 +5109,8 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
break;
}
if (j == od_vddc_lookup_table->count) {
- od_vddc_lookup_table->entries[j-1].us_vdd =
+ j = od_vddc_lookup_table->count - 1;
+ od_vddc_lookup_table->entries[j].us_vdd =
podn_vdd_dep->entries[i].vddc;
data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
}
@@ -5026,25 +5118,38 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
}
dpm_table = &data->dpm_table.soc_table;
for (i = 0; i < dep_table->count; i++) {
- if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[dep_table->count-1].vddInd &&
- dep_table->entries[i].clk < podn_vdd_dep->entries[dep_table->count-1].clk) {
+ if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[podn_vdd_dep->count-1].vddInd &&
+ dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count-1].clk) {
data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
- podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[dep_table->count-1].clk;
- dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk;
+ for (; (i < dep_table->count) &&
+ (dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk); i++) {
+ podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[podn_vdd_dep->count-1].clk;
+ dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk;
+ }
+ break;
+ } else {
+ dpm_table->dpm_levels[i].value = dep_table->entries[i].clk;
+ podn_vdd_dep_on_socclk->entries[i].vddc = dep_table->entries[i].vddc;
+ podn_vdd_dep_on_socclk->entries[i].vddInd = dep_table->entries[i].vddInd;
+ podn_vdd_dep_on_socclk->entries[i].clk = dep_table->entries[i].clk;
}
}
if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk <
- podn_vdd_dep->entries[dep_table->count-1].clk) {
+ podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk) {
data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
- podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = podn_vdd_dep->entries[dep_table->count-1].clk;
- dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = podn_vdd_dep->entries[dep_table->count-1].clk;
+ podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk =
+ podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk;
+ dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value =
+ podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk;
}
if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd <
- podn_vdd_dep->entries[dep_table->count-1].vddInd) {
+ podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd) {
data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
- podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = podn_vdd_dep->entries[dep_table->count-1].vddInd;
+ podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd =
+ podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd;
}
}
+ vega10_odn_update_power_state(hwmgr);
}
static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
@@ -5079,6 +5184,11 @@ static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
} else if (PP_OD_RESTORE_DEFAULT_TABLE == type) {
memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table));
vega10_odn_initial_default_setting(hwmgr);
+ vega10_odn_update_power_state(hwmgr);
+ /* force to update all clock tables */
+ data->need_update_dpm_table = DPMTABLE_UPDATE_SCLK |
+ DPMTABLE_UPDATE_MCLK |
+ DPMTABLE_UPDATE_SOCCLK;
return 0;
} else if (PP_OD_COMMIT_DPM_TABLE == type) {
vega10_check_dpm_table_updated(hwmgr);
@@ -5201,8 +5311,12 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)
{
+ struct amdgpu_device *adev = hwmgr->adev;
+
hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
hwmgr->pptable_func = &vega10_pptable_funcs;
+ if (amdgpu_passthrough(adev))
+ return vega10_baco_set_cap(hwmgr);
return 0;
}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index b6767d74dc85..83d22cdeaa29 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,
return result;
}
+
+int vega10_baco_set_cap(struct pp_hwmgr *hwmgr)
+{
+ int result = 0;
+
+ const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
+
+ powerplay_table = get_powerplay_table(hwmgr);
+
+ PP_ASSERT_WITH_CODE((powerplay_table != NULL),
+ "Missing PowerPlay Table!", return -1);
+
+ result = check_powerplay_tables(hwmgr, powerplay_table);
+
+ PP_ASSERT_WITH_CODE((result == 0),
+ "check_powerplay_tables failed", return result);
+
+ set_hw_cap(
+ hwmgr,
+ 0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
+ PHM_PlatformCaps_BACO);
+ return result;
+}
+
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
index d83ed2af7aa3..da5fbec9b0cd 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
@@ -59,4 +59,5 @@ extern int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr);
extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index,
struct pp_power_state *power_state, int (*call_back_func)(struct pp_hwmgr *, void *,
struct pp_power_state *, void *, uint32_t));
+extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
#endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 707cd4b0357f..efb6d3762feb 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -289,6 +289,8 @@ static int vega12_set_features_platform_caps(struct pp_hwmgr *hwmgr)
static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr)
{
struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
+ struct amdgpu_device *adev = hwmgr->adev;
+ uint32_t top32, bottom32;
int i;
data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id =
@@ -353,6 +355,14 @@ static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr)
((data->registry_data.disallowed_features >> i) & 1) ?
false : true;
}
+
+ /* Get the SN to turn into a Unique ID */
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
+ top32 = smum_get_argument(hwmgr);
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
+ bottom32 = smum_get_argument(hwmgr);
+
+ adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
}
static int vega12_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
@@ -1237,21 +1247,39 @@ static uint32_t vega12_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low)
return (mem_clk * 100);
}
+static int vega12_get_metrics_table(struct pp_hwmgr *hwmgr, SmuMetrics_t *metrics_table)
+{
+ struct vega12_hwmgr *data =
+ (struct vega12_hwmgr *)(hwmgr->backend);
+ int ret = 0;
+
+ if (!data->metrics_time || time_after(jiffies, data->metrics_time + HZ / 2)) {
+ ret = smum_smc_table_manager(hwmgr, (uint8_t *)metrics_table,
+ TABLE_SMU_METRICS, true);
+ if (ret) {
+ pr_info("Failed to export SMU metrics table!\n");
+ return ret;
+ }
+ memcpy(&data->metrics_table, metrics_table, sizeof(SmuMetrics_t));
+ data->metrics_time = jiffies;
+ } else
+ memcpy(metrics_table, &data->metrics_table, sizeof(SmuMetrics_t));
+
+ return ret;
+}
+
static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query)
{
-#if 0
- uint32_t value;
+ SmuMetrics_t metrics_table;
+ int ret = 0;
- PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr,
- PPSMC_MSG_GetCurrPkgPwr),
- "Failed to get current package power!",
- return -EINVAL);
+ ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+ if (ret)
+ return ret;
- value = smum_get_argument(hwmgr);
- /* power value is an integer */
- *query = value << 8;
-#endif
- return 0;
+ *query = metrics_table.CurrSocketPower << 8;
+
+ return ret;
}
static int vega12_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx_freq)
@@ -1290,25 +1318,27 @@ static int vega12_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_f
static int vega12_get_current_activity_percent(
struct pp_hwmgr *hwmgr,
+ int idx,
uint32_t *activity_percent)
{
+ SmuMetrics_t metrics_table;
int ret = 0;
- uint32_t current_activity = 50;
-#if 0
- ret = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetAverageGfxActivity, 0);
- if (!ret) {
- current_activity = smum_get_argument(hwmgr);
- if (current_activity > 100) {
- PP_ASSERT(false,
- "[GetCurrentActivityPercent] Activity Percentage Exceeds 100!");
- current_activity = 100;
- }
- } else
- PP_ASSERT(false,
- "[GetCurrentActivityPercent] Attempt To Send Get Average Graphics Activity to SMU Failed!");
-#endif
- *activity_percent = current_activity;
+ ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+ if (ret)
+ return ret;
+
+ switch (idx) {
+ case AMDGPU_PP_SENSOR_GPU_LOAD:
+ *activity_percent = metrics_table.AverageGfxActivity;
+ break;
+ case AMDGPU_PP_SENSOR_MEM_LOAD:
+ *activity_percent = metrics_table.AverageUclkActivity;
+ break;
+ default:
+ pr_err("Invalid index for retrieving clock activity\n");
+ return -EINVAL;
+ }
return ret;
}
@@ -1317,6 +1347,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
void *value, int *size)
{
struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
+ SmuMetrics_t metrics_table;
int ret = 0;
switch (idx) {
@@ -1331,7 +1362,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*size = 4;
break;
case AMDGPU_PP_SENSOR_GPU_LOAD:
- ret = vega12_get_current_activity_percent(hwmgr, (uint32_t *)value);
+ case AMDGPU_PP_SENSOR_MEM_LOAD:
+ ret = vega12_get_current_activity_percent(hwmgr, idx, (uint32_t *)value);
if (!ret)
*size = 4;
break;
@@ -1339,6 +1371,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr);
*size = 4;
break;
+ case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
+ ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+ if (ret)
+ return ret;
+
+ *((uint32_t *)value) = metrics_table.TemperatureHotspot *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
+ case AMDGPU_PP_SENSOR_MEM_TEMP:
+ ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+ if (ret)
+ return ret;
+
+ *((uint32_t *)value) = metrics_table.TemperatureHBM *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
case AMDGPU_PP_SENSOR_UVD_POWER:
*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
*size = 4;
@@ -1349,6 +1399,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
break;
case AMDGPU_PP_SENSOR_GPU_POWER:
ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value);
+ if (!ret)
+ *size = 4;
break;
case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK:
ret = vega12_get_enabled_smc_features(hwmgr, (uint64_t *)value);
@@ -2526,12 +2578,23 @@ static int vega12_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
struct PP_TemperatureRange *thermal_data)
{
- struct phm_ppt_v3_information *pptable_information =
- (struct phm_ppt_v3_information *)hwmgr->pptable;
+ struct vega12_hwmgr *data =
+ (struct vega12_hwmgr *)(hwmgr->backend);
+ PPTable_t *pp_table = &(data->smc_state_table.pp_table);
memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
- thermal_data->max = pptable_information->us_software_shutdown_temp *
+ thermal_data->max = pp_table->TedgeLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->mem_crit_max = pp_table->ThbmLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
index b3e424d28994..73875399666a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
@@ -396,6 +396,9 @@ struct vega12_hwmgr {
/* ---- Gfxoff ---- */
bool gfxoff_controlled_by_driver;
+
+ unsigned long metrics_time;
+ SmuMetrics_t metrics_table;
};
#define VEGA12_DPM2_NEAR_TDP_DEC 10
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 9b9f87b84910..f27c6fbb192e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -97,6 +97,27 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
if (hwmgr->smu_version < 0x282100)
data->registry_data.disallowed_features |= FEATURE_ECC_MASK;
+ if (!(hwmgr->feature_mask & PP_PCIE_DPM_MASK))
+ data->registry_data.disallowed_features |= FEATURE_DPM_LINK_MASK;
+
+ if (!(hwmgr->feature_mask & PP_SCLK_DPM_MASK))
+ data->registry_data.disallowed_features |= FEATURE_DPM_GFXCLK_MASK;
+
+ if (!(hwmgr->feature_mask & PP_SOCCLK_DPM_MASK))
+ data->registry_data.disallowed_features |= FEATURE_DPM_SOCCLK_MASK;
+
+ if (!(hwmgr->feature_mask & PP_MCLK_DPM_MASK))
+ data->registry_data.disallowed_features |= FEATURE_DPM_UCLK_MASK;
+
+ if (!(hwmgr->feature_mask & PP_DCEFCLK_DPM_MASK))
+ data->registry_data.disallowed_features |= FEATURE_DPM_DCEFCLK_MASK;
+
+ if (!(hwmgr->feature_mask & PP_ULV_MASK))
+ data->registry_data.disallowed_features |= FEATURE_ULV_MASK;
+
+ if (!(hwmgr->feature_mask & PP_SCLK_DEEP_SLEEP_MASK))
+ data->registry_data.disallowed_features |= FEATURE_DS_GFXCLK_MASK;
+
data->registry_data.od_state_in_dc_support = 0;
data->registry_data.thermal_support = 1;
data->registry_data.skip_baco_hardware = 0;
@@ -303,6 +324,8 @@ static int vega20_set_features_platform_caps(struct pp_hwmgr *hwmgr)
static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr)
{
struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend);
+ struct amdgpu_device *adev = hwmgr->adev;
+ uint32_t top32, bottom32;
int i;
data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id =
@@ -372,6 +395,14 @@ static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr)
((data->registry_data.disallowed_features >> i) & 1) ?
false : true;
}
+
+ /* Get the SN to turn into a Unique ID */
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
+ top32 = smum_get_argument(hwmgr);
+ smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
+ bottom32 = smum_get_argument(hwmgr);
+
+ adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
}
static int vega20_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
@@ -2094,6 +2125,7 @@ static int vega20_get_current_clk_freq(struct pp_hwmgr *hwmgr,
}
static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr,
+ int idx,
uint32_t *activity_percent)
{
int ret = 0;
@@ -2103,7 +2135,17 @@ static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr,
if (ret)
return ret;
- *activity_percent = metrics_table.AverageGfxActivity;
+ switch (idx) {
+ case AMDGPU_PP_SENSOR_GPU_LOAD:
+ *activity_percent = metrics_table.AverageGfxActivity;
+ break;
+ case AMDGPU_PP_SENSOR_MEM_LOAD:
+ *activity_percent = metrics_table.AverageUclkActivity;
+ break;
+ default:
+ pr_err("Invalid index for retrieving clock activity\n");
+ return -EINVAL;
+ }
return ret;
}
@@ -2134,14 +2176,33 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*size = 4;
break;
case AMDGPU_PP_SENSOR_GPU_LOAD:
- ret = vega20_get_current_activity_percent(hwmgr, (uint32_t *)value);
+ case AMDGPU_PP_SENSOR_MEM_LOAD:
+ ret = vega20_get_current_activity_percent(hwmgr, idx, (uint32_t *)value);
if (!ret)
*size = 4;
break;
- case AMDGPU_PP_SENSOR_GPU_TEMP:
+ case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
*((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr);
*size = 4;
break;
+ case AMDGPU_PP_SENSOR_EDGE_TEMP:
+ ret = vega20_get_metrics_table(hwmgr, &metrics_table);
+ if (ret)
+ return ret;
+
+ *((uint32_t *)value) = metrics_table.TemperatureEdge *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
+ case AMDGPU_PP_SENSOR_MEM_TEMP:
+ ret = vega20_get_metrics_table(hwmgr, &metrics_table);
+ if (ret)
+ return ret;
+
+ *((uint32_t *)value) = metrics_table.TemperatureHBM *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ *size = 4;
+ break;
case AMDGPU_PP_SENSOR_UVD_POWER:
*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
*size = 4;
@@ -3974,12 +4035,23 @@ static int vega20_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
struct PP_TemperatureRange *thermal_data)
{
- struct phm_ppt_v3_information *pptable_information =
- (struct phm_ppt_v3_information *)hwmgr->pptable;
+ struct vega20_hwmgr *data =
+ (struct vega20_hwmgr *)(hwmgr->backend);
+ PPTable_t *pp_table = &(data->smc_state_table.pp_table);
memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
- thermal_data->max = pptable_information->us_software_shutdown_temp *
+ thermal_data->max = pp_table->TedgeLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->mem_crit_max = pp_table->ThbmLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index c8b168b3413b..3eb1de9ecf73 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -401,8 +401,12 @@ struct smu_context
uint32_t workload_setting[WORKLOAD_POLICY_MAX];
uint32_t power_profile_mode;
uint32_t default_power_profile_mode;
+ bool pm_enabled;
uint32_t smc_if_version;
+
+ unsigned long metrics_time;
+ void *metrics_table;
};
struct pptable_funcs {
@@ -458,6 +462,8 @@ struct pptable_funcs {
uint32_t *mclk_mask,
uint32_t *soc_mask);
int (*set_cpu_power_state)(struct smu_context *smu);
+ int (*set_ppfeature_status)(struct smu_context *smu, uint64_t ppfeatures);
+ int (*get_ppfeature_status)(struct smu_context *smu, char *buf);
};
struct smu_funcs
@@ -727,7 +733,10 @@ struct smu_funcs
((smu)->funcs->get_mclk ? (smu)->funcs->get_mclk((smu), (low)) : 0)
#define smu_set_xgmi_pstate(smu, pstate) \
((smu)->funcs->set_xgmi_pstate ? (smu)->funcs->set_xgmi_pstate((smu), (pstate)) : 0)
-
+#define smu_set_ppfeature_status(smu, ppfeatures) \
+ ((smu)->ppt_funcs->set_ppfeature_status ? (smu)->ppt_funcs->set_ppfeature_status((smu), (ppfeatures)) : -EINVAL)
+#define smu_get_ppfeature_status(smu, buf) \
+ ((smu)->ppt_funcs->get_ppfeature_status ? (smu)->ppt_funcs->get_ppfeature_status((smu), (buf)) : -EINVAL)
extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table,
uint16_t *size, uint8_t *frev, uint8_t *crev,
@@ -767,4 +776,5 @@ extern int smu_dpm_set_power_gate(struct smu_context *smu,uint32_t block_type, b
extern int smu_handle_task(struct smu_context *smu,
enum amd_dpm_forced_level level,
enum amd_pp_task task_id);
+int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version);
#endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
index a99b5cbb113e..a5f2227a3971 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
@@ -124,6 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock {
struct PP_TemperatureRange {
int min;
int max;
+ int edge_emergency_max;
+ int hotspot_min;
+ int hotspot_crit_max;
+ int hotspot_emergency_max;
+ int mem_min;
+ int mem_crit_max;
+ int mem_emergency_max;
};
struct PP_StateValidationBlock {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
index 201d2b6329ab..3e30768f9e1c 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
@@ -27,14 +27,18 @@
static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =
{
- {-273150, 99000},
- { 120000, 120000},
+ {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
+ { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
};
static const struct PP_TemperatureRange SMU7ThermalPolicy[] =
{
- {-273150, 99000},
- { 120000, 120000},
+ {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
+ { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
};
+#define CTF_OFFSET_EDGE 5
+#define CTF_OFFSET_HOTSPOT 5
+#define CTF_OFFSET_HBM 5
+
#endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
index aa8d81f4111e..02c965d64256 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
@@ -36,6 +36,9 @@
#define smnMP0_FW_INTF 0x30101c0
#define smnMP1_PUB_CTRL 0x3010b14
+#define TEMP_RANGE_MIN (0)
+#define TEMP_RANGE_MAX (80 * 1000)
+
struct smu_11_0_max_sustainable_clocks {
uint32_t display_clock;
uint32_t phy_clock;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index 82550a8a3a3f..c5288831aa15 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -41,6 +41,7 @@ enum SMU_MEMBER {
HandshakeDisables = 0,
VoltageChangeTimeout,
AverageGraphicsActivity,
+ AverageMemoryActivity,
PreVBlankGap,
VBlankTimeout,
UcodeLoadStatus,
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 92903a4cc4d8..d2eeb6240484 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -223,20 +223,27 @@ static int smu_v11_0_check_fw_status(struct smu_context *smu)
static int smu_v11_0_check_fw_version(struct smu_context *smu)
{
- uint32_t smu_version = 0xff;
+ uint32_t if_version = 0xff, smu_version = 0xff;
+ uint16_t smu_major;
+ uint8_t smu_minor, smu_debug;
int ret = 0;
- ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion);
+ ret = smu_get_smc_version(smu, &if_version, &smu_version);
if (ret)
- goto err;
+ return ret;
- ret = smu_read_smc_arg(smu, &smu_version);
- if (ret)
- goto err;
+ smu_major = (smu_version >> 16) & 0xffff;
+ smu_minor = (smu_version >> 8) & 0xff;
+ smu_debug = (smu_version >> 0) & 0xff;
+
+ pr_info("SMU Driver IF Version = 0x%08x, SMU FW Version = 0x%08x (%d.%d.%d)\n",
+ if_version, smu_version, smu_major, smu_minor, smu_debug);
- if (smu_version != smu->smc_if_version)
+ if (if_version != smu->smc_if_version) {
+ pr_err("SMU driver if version not matched\n");
ret = -EINVAL;
-err:
+ }
+
return ret;
}
@@ -353,6 +360,8 @@ static int smu_v11_0_init_power(struct smu_context *smu)
{
struct smu_power_context *smu_power = &smu->smu_power;
+ if (!smu->pm_enabled)
+ return 0;
if (smu_power->power_context || smu_power->power_context_size != 0)
return -EINVAL;
@@ -362,6 +371,13 @@ static int smu_v11_0_init_power(struct smu_context *smu)
return -ENOMEM;
smu_power->power_context_size = sizeof(struct smu_11_0_dpm_context);
+ smu->metrics_time = 0;
+ smu->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);
+ if (!smu->metrics_table) {
+ kfree(smu_power->power_context);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -369,10 +385,14 @@ static int smu_v11_0_fini_power(struct smu_context *smu)
{
struct smu_power_context *smu_power = &smu->smu_power;
+ if (!smu->pm_enabled)
+ return 0;
if (!smu_power->power_context || smu_power->power_context_size == 0)
return -EINVAL;
+ kfree(smu->metrics_table);
kfree(smu_power->power_context);
+ smu->metrics_table = NULL;
smu_power->power_context = NULL;
smu_power->power_context_size = 0;
@@ -634,6 +654,8 @@ static int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu)
{
struct smu_table_context *table_context = &smu->smu_table;
+ if (!smu->pm_enabled)
+ return 0;
if (!table_context)
return -EINVAL;
@@ -662,6 +684,9 @@ static int smu_v11_0_set_tool_table_location(struct smu_context *smu)
static int smu_v11_0_init_display(struct smu_context *smu)
{
int ret = 0;
+
+ if (!smu->pm_enabled)
+ return ret;
ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, 0);
return ret;
}
@@ -671,6 +696,8 @@ static int smu_v11_0_update_feature_enable_state(struct smu_context *smu, uint32
uint32_t feature_low = 0, feature_high = 0;
int ret = 0;
+ if (!smu->pm_enabled)
+ return ret;
if (feature_id >= 0 && feature_id < 31)
feature_low = (1 << feature_id);
else if (feature_id > 31 && feature_id < 63)
@@ -777,10 +804,13 @@ static int smu_v11_0_system_features_control(struct smu_context *smu,
uint32_t feature_mask[2];
int ret = 0;
- ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures :
- SMU_MSG_DisableAllSmuFeatures));
- if (ret)
- return ret;
+ if (smu->pm_enabled) {
+ ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures :
+ SMU_MSG_DisableAllSmuFeatures));
+ if (ret)
+ return ret;
+ }
+
ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
if (ret)
return ret;
@@ -797,6 +827,8 @@ static int smu_v11_0_notify_display_change(struct smu_context *smu)
{
int ret = 0;
+ if (!smu->pm_enabled)
+ return ret;
if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT))
ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1);
@@ -809,6 +841,8 @@ smu_v11_0_get_max_sustainable_clock(struct smu_context *smu, uint32_t *clock,
{
int ret = 0;
+ if (!smu->pm_enabled)
+ return ret;
ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDcModeMaxDpmFreq,
clock_select << 16);
if (ret) {
@@ -995,9 +1029,20 @@ static int smu_v11_0_get_current_clk_freq(struct smu_context *smu, uint32_t clk_
static int smu_v11_0_get_thermal_range(struct smu_context *smu,
struct PP_TemperatureRange *range)
{
+ PPTable_t *pptable = smu->smu_table.driver_pptable;
memcpy(range, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
- range->max = smu->smu_table.software_shutdown_temp *
+ range->max = pptable->TedgeLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ range->edge_emergency_max = (pptable->TedgeLimit + CTF_OFFSET_EDGE) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ range->hotspot_crit_max = pptable->ThotspotLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ range->hotspot_emergency_max = (pptable->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ range->mem_crit_max = pptable->ThbmLimit *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ range->mem_emergency_max = (pptable->ThbmLimit + CTF_OFFSET_HBM)*
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
return 0;
@@ -1062,9 +1107,20 @@ static int smu_v11_0_set_thermal_fan_table(struct smu_context *smu)
static int smu_v11_0_start_thermal_control(struct smu_context *smu)
{
int ret = 0;
- struct PP_TemperatureRange range;
+ struct PP_TemperatureRange range = {
+ TEMP_RANGE_MIN,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MIN,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MIN,
+ TEMP_RANGE_MAX,
+ TEMP_RANGE_MAX};
struct amdgpu_device *adev = smu->adev;
+ if (!smu->pm_enabled)
+ return ret;
smu_v11_0_get_thermal_range(smu, &range);
if (smu->smu_table.thermal_controller_type) {
@@ -1082,11 +1138,39 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
adev->pm.dpm.thermal.min_temp = range.min;
adev->pm.dpm.thermal.max_temp = range.max;
+ adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max;
+ adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min;
+ adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max;
+ adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max;
+ adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
+ adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
+ adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
+
+ return ret;
+}
+
+static int smu_v11_0_get_metrics_table(struct smu_context *smu,
+ SmuMetrics_t *metrics_table)
+{
+ int ret = 0;
+
+ if (!smu->metrics_time || time_after(jiffies, smu->metrics_time + HZ / 1000)) {
+ ret = smu_update_table(smu, TABLE_SMU_METRICS,
+ (void *)metrics_table, false);
+ if (ret) {
+ pr_info("Failed to export SMU metrics table!\n");
+ return ret;
+ }
+ memcpy(smu->metrics_table, metrics_table, sizeof(SmuMetrics_t));
+ smu->metrics_time = jiffies;
+ } else
+ memcpy(metrics_table, smu->metrics_table, sizeof(SmuMetrics_t));
return ret;
}
static int smu_v11_0_get_current_activity_percent(struct smu_context *smu,
+ enum amd_pp_sensors sensor,
uint32_t *value)
{
int ret = 0;
@@ -1095,31 +1179,64 @@ static int smu_v11_0_get_current_activity_percent(struct smu_context *smu,
if (!value)
return -EINVAL;
- ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false);
+ ret = smu_v11_0_get_metrics_table(smu, &metrics);
if (ret)
return ret;
- *value = metrics.AverageGfxActivity;
+ switch (sensor) {
+ case AMDGPU_PP_SENSOR_GPU_LOAD:
+ *value = metrics.AverageGfxActivity;
+ break;
+ case AMDGPU_PP_SENSOR_MEM_LOAD:
+ *value = metrics.AverageUclkActivity;
+ break;
+ default:
+ pr_err("Invalid sensor for retrieving clock activity\n");
+ return -EINVAL;
+ }
return 0;
}
-static int smu_v11_0_thermal_get_temperature(struct smu_context *smu, uint32_t *value)
+static int smu_v11_0_thermal_get_temperature(struct smu_context *smu,
+ enum amd_pp_sensors sensor,
+ uint32_t *value)
{
struct amdgpu_device *adev = smu->adev;
+ SmuMetrics_t metrics;
uint32_t temp = 0;
+ int ret = 0;
if (!value)
return -EINVAL;
- temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS);
- temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
- CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
+ ret = smu_v11_0_get_metrics_table(smu, &metrics);
+ if (ret)
+ return ret;
- temp = temp & 0x1ff;
- temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ switch (sensor) {
+ case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
+ temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS);
+ temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
+ CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
+
+ temp = temp & 0x1ff;
+ temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES;
- *value = temp;
+ *value = temp;
+ break;
+ case AMDGPU_PP_SENSOR_EDGE_TEMP:
+ *value = metrics.TemperatureEdge *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ break;
+ case AMDGPU_PP_SENSOR_MEM_TEMP:
+ *value = metrics.TemperatureHBM *
+ PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ break;
+ default:
+ pr_err("Invalid sensor for retrieving temp\n");
+ return -EINVAL;
+ }
return 0;
}
@@ -1132,7 +1249,7 @@ static int smu_v11_0_get_gpu_power(struct smu_context *smu, uint32_t *value)
if (!value)
return -EINVAL;
- ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false);
+ ret = smu_v11_0_get_metrics_table(smu, &metrics);
if (ret)
return ret;
@@ -1174,7 +1291,9 @@ static int smu_v11_0_read_sensor(struct smu_context *smu,
int ret = 0;
switch (sensor) {
case AMDGPU_PP_SENSOR_GPU_LOAD:
+ case AMDGPU_PP_SENSOR_MEM_LOAD:
ret = smu_v11_0_get_current_activity_percent(smu,
+ sensor,
(uint32_t *)data);
*size = 4;
break;
@@ -1186,8 +1305,10 @@ static int smu_v11_0_read_sensor(struct smu_context *smu,
ret = smu_get_current_clk_freq(smu, PPCLK_GFXCLK, (uint32_t *)data);
*size = 4;
break;
- case AMDGPU_PP_SENSOR_GPU_TEMP:
- ret = smu_v11_0_thermal_get_temperature(smu, (uint32_t *)data);
+ case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
+ case AMDGPU_PP_SENSOR_EDGE_TEMP:
+ case AMDGPU_PP_SENSOR_MEM_TEMP:
+ ret = smu_v11_0_thermal_get_temperature(smu, sensor, (uint32_t *)data);
*size = 4;
break;
case AMDGPU_PP_SENSOR_GPU_POWER:
@@ -1235,6 +1356,8 @@ smu_v11_0_display_clock_voltage_request(struct smu_context *smu,
PPCLK_e clk_select = 0;
uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000;
+ if (!smu->pm_enabled)
+ return -EINVAL;
if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) {
switch (clk_type) {
case amd_pp_dcef_clock:
@@ -1518,7 +1641,7 @@ static int smu_v11_0_get_power_profile_mode(struct smu_context *smu, char *buf)
"PD_Data_error_rate_coeff"};
int result = 0;
- if (!buf)
+ if (!smu->pm_enabled || !buf)
return -EINVAL;
size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n",
@@ -1605,6 +1728,8 @@ static int smu_v11_0_set_power_profile_mode(struct smu_context *smu, long *input
smu->power_profile_mode = input[size];
+ if (!smu->pm_enabled)
+ return ret;
if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
pr_err("Invalid power profile mode %d\n", smu->power_profile_mode);
return -EINVAL;
@@ -1710,24 +1835,24 @@ static int smu_v11_0_update_od8_settings(struct smu_context *smu,
static int smu_v11_0_dpm_set_uvd_enable(struct smu_context *smu, bool enable)
{
- if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT))
+ if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT))
return 0;
- if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT))
+ if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT))
return 0;
- return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable);
+ return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable);
}
static int smu_v11_0_dpm_set_vce_enable(struct smu_context *smu, bool enable)
{
- if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT))
+ if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT))
return 0;
- if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT))
+ if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT))
return 0;
- return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable);
+ return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable);
}
static int smu_v11_0_get_current_rpm(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
index 669bd0c2a16c..9ef57fcf7e78 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
@@ -2254,6 +2254,8 @@ static uint32_t ci_get_offsetof(uint32_t type, uint32_t member)
return offsetof(SMU7_SoftRegisters, VoltageChangeTimeout);
case AverageGraphicsActivity:
return offsetof(SMU7_SoftRegisters, AverageGraphicsA);
+ case AverageMemoryActivity:
+ return offsetof(SMU7_SoftRegisters, AverageMemoryA);
case PreVBlankGap:
return offsetof(SMU7_SoftRegisters, PreVBlankGap);
case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index bc8375cbf297..0ce85b73338e 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -2304,6 +2304,8 @@ static uint32_t fiji_get_offsetof(uint32_t type, uint32_t member)
return offsetof(SMU73_SoftRegisters, VoltageChangeTimeout);
case AverageGraphicsActivity:
return offsetof(SMU73_SoftRegisters, AverageGraphicsActivity);
+ case AverageMemoryActivity:
+ return offsetof(SMU73_SoftRegisters, AverageMemoryActivity);
case PreVBlankGap:
return offsetof(SMU73_SoftRegisters, PreVBlankGap);
case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
index 375ccf6ff5f2..f24f13d77808 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
@@ -2219,6 +2219,8 @@ static uint32_t iceland_get_offsetof(uint32_t type, uint32_t member)
return offsetof(SMU71_SoftRegisters, VoltageChangeTimeout);
case AverageGraphicsActivity:
return offsetof(SMU71_SoftRegisters, AverageGraphicsActivity);
+ case AverageMemoryActivity:
+ return offsetof(SMU71_SoftRegisters, AverageMemoryActivity);
case PreVBlankGap:
return offsetof(SMU71_SoftRegisters, PreVBlankGap);
case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 2d4cfe14f72e..0d8958e71b94 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -2313,6 +2313,8 @@ static uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member)
return offsetof(SMU74_SoftRegisters, VoltageChangeTimeout);
case AverageGraphicsActivity:
return offsetof(SMU74_SoftRegisters, AverageGraphicsActivity);
+ case AverageMemoryActivity:
+ return offsetof(SMU74_SoftRegisters, AverageMemoryActivity);
case PreVBlankGap:
return offsetof(SMU74_SoftRegisters, PreVBlankGap);
case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index 3ed6c5f1e5cf..060c0f7f5238 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -2611,6 +2611,8 @@ static uint32_t tonga_get_offsetof(uint32_t type, uint32_t member)
return offsetof(SMU72_SoftRegisters, VoltageChangeTimeout);
case AverageGraphicsActivity:
return offsetof(SMU72_SoftRegisters, AverageGraphicsActivity);
+ case AverageMemoryActivity:
+ return offsetof(SMU72_SoftRegisters, AverageMemoryActivity);
case PreVBlankGap:
return offsetof(SMU72_SoftRegisters, PreVBlankGap);
case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
index ddb801517667..1eaf0fa28ef7 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
@@ -287,8 +287,26 @@ static int vega12_smu_init(struct pp_hwmgr *hwmgr)
priv->smu_tables.entry[TABLE_OVERDRIVE].version = 0x01;
priv->smu_tables.entry[TABLE_OVERDRIVE].size = sizeof(OverDriveTable_t);
+ /* allocate space for SMU_METRICS table */
+ ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,
+ sizeof(SmuMetrics_t),
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &priv->smu_tables.entry[TABLE_SMU_METRICS].handle,
+ &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr,
+ &priv->smu_tables.entry[TABLE_SMU_METRICS].table);
+ if (ret)
+ goto err4;
+
+ priv->smu_tables.entry[TABLE_SMU_METRICS].version = 0x01;
+ priv->smu_tables.entry[TABLE_SMU_METRICS].size = sizeof(SmuMetrics_t);
+
return 0;
+err4:
+ amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle,
+ &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr,
+ &priv->smu_tables.entry[TABLE_OVERDRIVE].table);
err3:
amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].handle,
&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].mc_addr,
@@ -334,6 +352,9 @@ static int vega12_smu_fini(struct pp_hwmgr *hwmgr)
amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle,
&priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr,
&priv->smu_tables.entry[TABLE_OVERDRIVE].table);
+ amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_SMU_METRICS].handle,
+ &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr,
+ &priv->smu_tables.entry[TABLE_SMU_METRICS].table);
kfree(hwmgr->smu_backend);
hwmgr->smu_backend = NULL;
}
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
index 1e69300f6175..d499204b2184 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
@@ -2167,6 +2167,8 @@ static uint32_t vegam_get_offsetof(uint32_t type, uint32_t member)
return offsetof(SMU75_SoftRegisters, VoltageChangeTimeout);
case AverageGraphicsActivity:
return offsetof(SMU75_SoftRegisters, AverageGraphicsActivity);
+ case AverageMemoryActivity:
+ return offsetof(SMU75_SoftRegisters, AverageMemoryActivity);
case PreVBlankGap:
return offsetof(SMU75_SoftRegisters, PreVBlankGap);
case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index 8fafcbdb1dfd..4aa8f5a69c4c 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -2374,6 +2374,157 @@ static int vega20_odn_edit_dpm_table(struct smu_context *smu,
return ret;
}
+static int vega20_get_enabled_smc_features(struct smu_context *smu,
+ uint64_t *features_enabled)
+{
+ uint32_t feature_mask[2] = {0, 0};
+ int ret = 0;
+
+ ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
+ if (ret)
+ return ret;
+
+ *features_enabled = ((((uint64_t)feature_mask[0] << SMU_FEATURES_LOW_SHIFT) & SMU_FEATURES_LOW_MASK) |
+ (((uint64_t)feature_mask[1] << SMU_FEATURES_HIGH_SHIFT) & SMU_FEATURES_HIGH_MASK));
+
+ return ret;
+}
+
+static int vega20_enable_smc_features(struct smu_context *smu,
+ bool enable, uint64_t feature_mask)
+{
+ uint32_t smu_features_low, smu_features_high;
+ int ret = 0;
+
+ smu_features_low = (uint32_t)((feature_mask & SMU_FEATURES_LOW_MASK) >> SMU_FEATURES_LOW_SHIFT);
+ smu_features_high = (uint32_t)((feature_mask & SMU_FEATURES_HIGH_MASK) >> SMU_FEATURES_HIGH_SHIFT);
+
+ if (enable) {
+ ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesLow,
+ smu_features_low);
+ if (ret)
+ return ret;
+ ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesHigh,
+ smu_features_high);
+ if (ret)
+ return ret;
+ } else {
+ ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesLow,
+ smu_features_low);
+ if (ret)
+ return ret;
+ ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesHigh,
+ smu_features_high);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+
+}
+
+static int vega20_get_ppfeature_status(struct smu_context *smu, char *buf)
+{
+ static const char *ppfeature_name[] = {
+ "DPM_PREFETCHER",
+ "GFXCLK_DPM",
+ "UCLK_DPM",
+ "SOCCLK_DPM",
+ "UVD_DPM",
+ "VCE_DPM",
+ "ULV",
+ "MP0CLK_DPM",
+ "LINK_DPM",
+ "DCEFCLK_DPM",
+ "GFXCLK_DS",
+ "SOCCLK_DS",
+ "LCLK_DS",
+ "PPT",
+ "TDC",
+ "THERMAL",
+ "GFX_PER_CU_CG",
+ "RM",
+ "DCEFCLK_DS",
+ "ACDC",
+ "VR0HOT",
+ "VR1HOT",
+ "FW_CTF",
+ "LED_DISPLAY",
+ "FAN_CONTROL",
+ "GFX_EDC",
+ "GFXOFF",
+ "CG",
+ "FCLK_DPM",
+ "FCLK_DS",
+ "MP1CLK_DS",
+ "MP0CLK_DS",
+ "XGMI",
+ "ECC"};
+ static const char *output_title[] = {
+ "FEATURES",
+ "BITMASK",
+ "ENABLEMENT"};
+ uint64_t features_enabled;
+ int i;
+ int ret = 0;
+ int size = 0;
+
+ ret = vega20_get_enabled_smc_features(smu, &features_enabled);
+ if (ret)
+ return ret;
+
+ size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled);
+ size += sprintf(buf + size, "%-19s %-22s %s\n",
+ output_title[0],
+ output_title[1],
+ output_title[2]);
+ for (i = 0; i < GNLD_FEATURES_MAX; i++) {
+ size += sprintf(buf + size, "%-19s 0x%016llx %6s\n",
+ ppfeature_name[i],
+ 1ULL << i,
+ (features_enabled & (1ULL << i)) ? "Y" : "N");
+ }
+
+ return size;
+}
+
+static int vega20_set_ppfeature_status(struct smu_context *smu, uint64_t new_ppfeature_masks)
+{
+ uint64_t features_enabled;
+ uint64_t features_to_enable;
+ uint64_t features_to_disable;
+ int ret = 0;
+
+ if (new_ppfeature_masks >= (1ULL << GNLD_FEATURES_MAX))
+ return -EINVAL;
+
+ ret = vega20_get_enabled_smc_features(smu, &features_enabled);
+ if (ret)
+ return ret;
+
+ features_to_disable =
+ features_enabled & ~new_ppfeature_masks;
+ features_to_enable =
+ ~features_enabled & new_ppfeature_masks;
+
+ pr_debug("features_to_disable 0x%llx\n", features_to_disable);
+ pr_debug("features_to_enable 0x%llx\n", features_to_enable);
+
+ if (features_to_disable) {
+ ret = vega20_enable_smc_features(smu, false, features_to_disable);
+ if (ret)
+ return ret;
+ }
+
+ if (features_to_enable) {
+ ret = vega20_enable_smc_features(smu, true, features_to_enable);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static const struct pptable_funcs vega20_ppt_funcs = {
.alloc_dpm_context = vega20_allocate_dpm_context,
.store_powerplay_table = vega20_store_powerplay_table,
@@ -2404,6 +2555,8 @@ static const struct pptable_funcs vega20_ppt_funcs = {
.unforce_dpm_levels = vega20_unforce_dpm_levels,
.upload_dpm_level = vega20_upload_dpm_level,
.get_profiling_clk_mask = vega20_get_profiling_clk_mask,
+ .set_ppfeature_status = vega20_set_ppfeature_status,
+ .get_ppfeature_status = vega20_get_ppfeature_status,
};
void vega20_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h
index 5a0d2af63173..87f3a8303645 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h
@@ -36,6 +36,50 @@
#define AVFS_CURVE 0
#define OD8_HOTCURVE_TEMPERATURE 85
+#define SMU_FEATURES_LOW_MASK 0x00000000FFFFFFFF
+#define SMU_FEATURES_LOW_SHIFT 0
+#define SMU_FEATURES_HIGH_MASK 0xFFFFFFFF00000000
+#define SMU_FEATURES_HIGH_SHIFT 32
+
+enum {
+ GNLD_DPM_PREFETCHER = 0,
+ GNLD_DPM_GFXCLK,
+ GNLD_DPM_UCLK,
+ GNLD_DPM_SOCCLK,
+ GNLD_DPM_UVD,
+ GNLD_DPM_VCE,
+ GNLD_ULV,
+ GNLD_DPM_MP0CLK,
+ GNLD_DPM_LINK,
+ GNLD_DPM_DCEFCLK,
+ GNLD_DS_GFXCLK,
+ GNLD_DS_SOCCLK,
+ GNLD_DS_LCLK,
+ GNLD_PPT,
+ GNLD_TDC,
+ GNLD_THERMAL,
+ GNLD_GFX_PER_CU_CG,
+ GNLD_RM,
+ GNLD_DS_DCEFCLK,
+ GNLD_ACDC,
+ GNLD_VR0HOT,
+ GNLD_VR1HOT,
+ GNLD_FW_CTF,
+ GNLD_LED_DISPLAY,
+ GNLD_FAN_CONTROL,
+ GNLD_DIDT,
+ GNLD_GFXOFF,
+ GNLD_CG,
+ GNLD_DPM_FCLK,
+ GNLD_DS_FCLK,
+ GNLD_DS_MP1CLK,
+ GNLD_DS_MP0CLK,
+ GNLD_XGMI,
+ GNLD_ECC,
+
+ GNLD_FEATURES_MAX
+};
+
struct vega20_dpm_level {
bool enabled;
uint32_t value;
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index dc067ed0b72d..070d1bc7e725 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -35,9 +35,10 @@ struct kfd_ioctl_get_version_args {
};
/* For kfd_ioctl_create_queue_args.queue_type. */
-#define KFD_IOC_QUEUE_TYPE_COMPUTE 0
-#define KFD_IOC_QUEUE_TYPE_SDMA 1
-#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 2
+#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0
+#define KFD_IOC_QUEUE_TYPE_SDMA 0x1
+#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2
+#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3
#define KFD_MAX_QUEUE_PERCENTAGE 100
#define KFD_MAX_QUEUE_PRIORITY 15
@@ -338,6 +339,7 @@ struct kfd_ioctl_acquire_vm_args {
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1)
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2)
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
+#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4)
/* Allocation flags: attributes/access options */
#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31)
#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30)
@@ -408,6 +410,21 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
__u32 n_success; /* to/from KFD */
};
+/* Allocate GWS for specific queue
+ *
+ * @gpu_id: device identifier
+ * @queue_id: queue's id that GWS is allocated for
+ * @num_gws: how many GWS to allocate
+ * @first_gws: index of the first GWS allocated.
+ * only support contiguous GWS allocation
+ */
+struct kfd_ioctl_alloc_queue_gws_args {
+ __u32 gpu_id; /* to KFD */
+ __u32 queue_id; /* to KFD */
+ __u32 num_gws; /* to KFD */
+ __u32 first_gws; /* from KFD */
+};
+
struct kfd_ioctl_get_dmabuf_info_args {
__u64 size; /* from KFD */
__u64 metadata_ptr; /* to KFD */
@@ -426,6 +443,13 @@ struct kfd_ioctl_import_dmabuf_args {
__u32 dmabuf_fd; /* to KFD */
};
+/* Register offset inside the remapped mmio page
+ */
+enum kfd_mmio_remap {
+ KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0,
+ KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
+};
+
#define AMDKFD_IOCTL_BASE 'K'
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -520,7 +544,10 @@ struct kfd_ioctl_import_dmabuf_args {
#define AMDKFD_IOC_IMPORT_DMABUF \
AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
+#define AMDKFD_IOC_ALLOC_QUEUE_GWS \
+ AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
+
#define AMDKFD_COMMAND_START 0x01
-#define AMDKFD_COMMAND_END 0x1E
+#define AMDKFD_COMMAND_END 0x1F
#endif